[glib/wip/tingping/guri-normalize] guri: Normalize uri segments if they are encoded
- From: Patrick Griffis <pgriffis src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/wip/tingping/guri-normalize] guri: Normalize uri segments if they are encoded
- Date: Wed, 14 Oct 2020 19:49:20 +0000 (UTC)
commit fe3603604f069ad90f0fa8dc1597e9452670f41c
Author: Patrick Griffis <pgriffis igalia com>
Date: Wed Oct 14 14:22:58 2020 -0500
guri: Normalize uri segments if they are encoded
This changes it so when a segment is encoded it will be
normalized at parse time which ensures its valid and
it can more easily be compared with other uris.
glib/guri.c | 42 +++++++++++++++++++++++++-----------------
glib/tests/uri.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 58 insertions(+), 17 deletions(-)
---
diff --git a/glib/guri.c b/glib/guri.c
index f04139b80..02d506432 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -289,15 +289,16 @@ uri_decoder (gchar **out,
GUriError parse_error,
GError **error)
{
- gchar *decoded, *d, c;
+ gchar c;
+ GString *decoded;
const gchar *invalid, *s, *end;
gssize len;
if (!(flags & G_URI_FLAGS_ENCODED))
just_normalize = FALSE;
- decoded = g_malloc (length + 1);
- for (s = start, end = s + length, d = decoded; s < end; s++)
+ decoded = g_string_sized_new (length + 1);
+ for (s = start, end = s + length; s < end; s++)
{
if (*s == '%')
{
@@ -311,7 +312,7 @@ uri_decoder (gchar **out,
g_set_error_literal (error, G_URI_ERROR, parse_error,
/* xgettext: no-c-format */
_("Invalid %-encoding in URI"));
- g_free (decoded);
+ g_string_free (decoded, TRUE);
return -1;
}
@@ -319,7 +320,7 @@ uri_decoder (gchar **out,
* fix it to "%25", since that might change the way that
* the URI's owner would interpret it.
*/
- *d++ = *s;
+ g_string_append_c (decoded, *s);
continue;
}
@@ -328,43 +329,50 @@ uri_decoder (gchar **out,
{
g_set_error_literal (error, G_URI_ERROR, parse_error,
_("Illegal character in URI"));
- g_free (decoded);
+ g_string_free (decoded, TRUE);
return -1;
}
if (just_normalize && !g_uri_char_is_unreserved (c))
{
- /* Leave the % sequence there. */
- *d++ = *s;
+ /* Leave the % sequence there but normalize it. */
+ g_string_append_c (decoded, *s);
+ g_string_append_c (decoded, g_ascii_toupper (s[1]));
+ g_string_append_c (decoded, g_ascii_toupper (s[2]));
+ s += 2;
}
else
{
- *d++ = c;
+ g_string_append_c (decoded, c);
s += 2;
}
}
else if (www_form && *s == '+')
- *d++ = ' ';
+ g_string_append_c (decoded, ' ');
+ /* Normalize any illegal characters */
+ else if (just_normalize && (!g_ascii_isgraph (*s) ||
+ (illegal_chars && strchr (illegal_chars, *s))))
+ g_string_append_printf (decoded, "%%%02X", (int)*s);
else
- *d++ = *s;
+ g_string_append_c (decoded, *s);
}
- *d = '\0';
- len = d - decoded;
+ len = decoded->len;
g_assert (len >= 0);
if (!(flags & G_URI_FLAGS_ENCODED) &&
- !g_utf8_validate (decoded, len, &invalid))
+ !g_utf8_validate (decoded->str, len, &invalid))
{
g_set_error_literal (error, G_URI_ERROR, parse_error,
_("Non-UTF-8 characters in URI"));
- g_free (decoded);
+ g_string_free (decoded, TRUE);
return -1;
}
if (out)
- *out = g_steal_pointer (&decoded);
+ *out = g_string_free (decoded, FALSE);
+ else
+ g_string_free (decoded, TRUE);
- g_free (decoded);
return len;
}
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index b8a0c6a47..e769a9ea5 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1708,6 +1708,38 @@ test_uri_join_split_round_trip (void)
}
}
+static const struct
+{
+ /* Inputs */
+ const gchar *uri;
+ GUriFlags flags;
+ /* Outputs */
+ const gchar *path;
+} normalize_tests[] =
+ {
+ { "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
+ "/path%20with%20spaces" },
+ { "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
+ "/path%20with%20spaces%202" },
+ { "http://foo/%aa", G_URI_FLAGS_ENCODED,
+ "/%AA" },
+ { "http://foo/%☺", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
+ "/%%FFFFFFE2%FFFFFF98%FFFFFFBA" },
+ };
+
+static void
+test_uri_normalize (void)
+{
+ for (gsize i = 0; i < G_N_ELEMENTS (normalize_tests); ++i)
+ {
+ GUri *uri = g_uri_parse (normalize_tests[i].uri,
+ normalize_tests[i].flags,
+ NULL);
+ g_assert_nonnull (uri);
+ g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
+ }
+}
+
int
main (int argc,
char *argv[])
@@ -1733,6 +1765,7 @@ main (int argc,
g_test_add_func ("/uri/to-string", test_uri_to_string);
g_test_add_func ("/uri/join", test_uri_join);
g_test_add_func ("/uri/join-split-round-trip", test_uri_join_split_round_trip);
+ g_test_add_func ("/uri/normalize", test_uri_normalize);
g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]