[glib/wip/tingping/guri-normalize: 1/2] guri: Add G_URI_FLAGS_SCHEME_NORMALIZE This flag enables optional scheme-defined normalization durin
- From: Patrick Griffis <pgriffis src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/wip/tingping/guri-normalize: 1/2] guri: Add G_URI_FLAGS_SCHEME_NORMALIZE This flag enables optional scheme-defined normalization durin
- Date: Mon, 26 Oct 2020 19:41:57 +0000 (UTC)
commit 25073a78af080cb1f2e945e94810b262a80ecff2
Author: Patrick Griffis <pgriffis igalia com>
Date: Fri Oct 23 14:36:54 2020 -0500
guri: Add G_URI_FLAGS_SCHEME_NORMALIZE
This flag enables optional scheme-defined normalization
during parsing of a URI.
glib/guri.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
glib/guri.h | 5 ++++
glib/tests/uri.c | 49 +++++++++++++++++++++++++++--------
3 files changed, 120 insertions(+), 12 deletions(-)
---
diff --git a/glib/guri.c b/glib/guri.c
index 520117ad1..d78503a6c 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -748,6 +748,50 @@ uri_cleanup (const gchar *uri_string)
return g_string_free (copy, FALSE);
}
+static gboolean
+should_normalize_empty_path (const char *scheme)
+{
+ const char * const schemes[] = { "https", "http", "wss", "ws" };
+ for (int i = 0; i < G_N_ELEMENTS (schemes); ++i)
+ {
+ if (!strcmp (schemes[i], scheme))
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static int
+normalize_port (const char *scheme,
+ int port)
+{
+ char *default_schemes[3] = { NULL };
+
+ switch (port)
+ {
+ case 21:
+ default_schemes[0] = "ftp";
+ break;
+ case 80:
+ default_schemes[0] = "http";
+ default_schemes[1] = "ws";
+ break;
+ case 443:
+ default_schemes[0] = "https";
+ default_schemes[1] = "wss";
+ break;
+ default:
+ break;
+ }
+
+ for (gsize i = 0; default_schemes[i]; ++i)
+ {
+ if (!strcmp (scheme, default_schemes[i]))
+ return -1;
+ }
+
+ return port;
+}
+
static gboolean
g_uri_split_internal (const gchar *uri_string,
GUriFlags flags,
@@ -766,6 +810,7 @@ g_uri_split_internal (const gchar *uri_string,
const gchar *end, *colon, *at, *path_start, *semi, *question;
const gchar *p, *bracket, *hostend;
gchar *cleaned_uri_string = NULL;
+ gchar *normalized_scheme = NULL;
if (scheme)
*scheme = NULL;
@@ -803,8 +848,9 @@ g_uri_split_internal (const gchar *uri_string,
if (p > uri_string && *p == ':')
{
+ normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
if (scheme)
- *scheme = g_ascii_strdown (uri_string, p - uri_string);
+ *scheme = g_steal_pointer (&normalized_scheme);
p++;
}
else
@@ -930,6 +976,22 @@ g_uri_split_internal (const gchar *uri_string,
G_URI_ERROR_BAD_PATH, error))
goto fail;
+ /* Scheme-based normalization */
+ if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
+ {
+ const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
+
+ if (should_normalize_empty_path (scheme_str) && path && !**path)
+ {
+ g_free (*path);
+ *path = g_strdup ("/");
+ }
+
+ if (port && *port != -1)
+ *port = normalize_port (scheme_str, *port);
+ }
+
+ g_free (normalized_scheme);
g_free (cleaned_uri_string);
return TRUE;
@@ -949,6 +1011,7 @@ g_uri_split_internal (const gchar *uri_string,
if (fragment)
g_clear_pointer (fragment, g_free);
+ g_free (normalized_scheme);
g_free (cleaned_uri_string);
return FALSE;
}
@@ -1402,6 +1465,19 @@ g_uri_parse_relative (GUri *base_uri,
uri->port = base_uri->port;
}
}
+
+ /* Scheme normalization couldn't have been done earlier
+ as the relative URI didn't have a scheme */
+ if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
+ {
+ if (should_normalize_empty_path (uri->scheme) && !*uri->path)
+ {
+ g_free (uri->path);
+ uri->path = g_strdup ("/");
+ }
+
+ uri->port = normalize_port (uri->scheme, uri->port);
+ }
}
return g_steal_pointer (&uri);
diff --git a/glib/guri.h b/glib/guri.h
index 3a7bb5c0e..587848766 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -62,6 +62,10 @@ void g_uri_unref (GUri *uri);
* @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only.
* @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the
* fragment only.
+ * @G_URI_FLAGS_SCHEME_NORMALIZE: Applies scheme-based normalization to the
+ * parsed URI. For example when parsing an HTTP URI changing empty paths
+ * to `/` and changing port `80` to `-1`. This only supports a subset
+ * of known schemes.
*
* Flags that describe a URI.
*
@@ -83,6 +87,7 @@ typedef enum {
G_URI_FLAGS_ENCODED_QUERY = 1 << 5,
G_URI_FLAGS_ENCODED_PATH = 1 << 6,
G_URI_FLAGS_ENCODED_FRAGMENT = 1 << 7,
+ G_URI_FLAGS_SCHEME_NORMALIZE = 1 << 8,
} GUriFlags;
GLIB_AVAILABLE_IN_2_66
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 758a11484..2ee6d410b 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1711,20 +1711,40 @@ test_uri_join_split_round_trip (void)
static const struct
{
/* Inputs */
+ const gchar *base;
const gchar *uri;
GUriFlags flags;
/* Outputs */
const gchar *path;
+ int port;
} normalize_tests[] =
{
- { "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
- "/path%20with%20spaces" },
- { "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
- "/path%20with%20spaces%202" },
- { "http://foo/%aa", G_URI_FLAGS_ENCODED,
- "/%AA" },
- { "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
- "/p%C3%A4th/" },
+ { NULL, "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
+ "/path%20with%20spaces", -1 },
+ { NULL, "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
+ "/path%20with%20spaces%202", -1 },
+ { NULL, "http://foo/%aa", G_URI_FLAGS_ENCODED,
+ "/%AA", -1 },
+ { NULL, "http://foo/p\xc3\xa4th/", G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
+ "/p%C3%A4th/", -1 },
+ { NULL, "http://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "/", -1 },
+ { NULL, "nothttp://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "", -1 },
+ { NULL, "http://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "/", -1 },
+ { NULL, "ftp://foo:21", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "", -1 },
+ { NULL, "nothttp://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "", 80 },
+ { "http://foo", "//bar", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "/", -1 },
+ { "http://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "/", -1 },
+ { "nothttp://foo", "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
+ "", 80 },
+ { "http://foo", "//bar", 0,
+ "", -1 },
};
static void
@@ -1732,11 +1752,18 @@ test_uri_normalize (void)
{
for (gsize i = 0; i < G_N_ELEMENTS (normalize_tests); ++i)
{
- GUri *uri = g_uri_parse (normalize_tests[i].uri,
- normalize_tests[i].flags,
- NULL);
+ GUri *uri, *base = NULL;
+ if (normalize_tests[i].base)
+ base = g_uri_parse (normalize_tests[i].base, normalize_tests[i].flags, NULL);
+
+ uri = g_uri_parse_relative (base,
+ normalize_tests[i].uri,
+ normalize_tests[i].flags,
+ NULL);
+
g_assert_nonnull (uri);
g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
+ g_assert_cmpint (g_uri_get_port (uri), ==, normalize_tests[i].port);
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]