[glib/wip/tingping/guri-normalize] guri: Add G_URI_FLAGS_SCHEME_NORMALIZE This flag enables optional scheme-defined normalization durin




commit e196a6bc6e154d6f06d5d764282dc6b2f0146e7b
Author: Patrick Griffis <pgriffis igalia com>
Date:   Fri Oct 23 14:36:54 2020 -0500

    guri: Add G_URI_FLAGS_SCHEME_NORMALIZE
    This flag enables optional scheme-defined normalization
    during parsing of a URI.

 glib/guri.c      | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 glib/guri.h      |  5 +++++
 glib/tests/uri.c | 20 +++++++++++++----
 3 files changed, 85 insertions(+), 5 deletions(-)
---
diff --git a/glib/guri.c b/glib/guri.c
index 02d506432..7c9120980 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -748,6 +748,50 @@ uri_cleanup (const gchar *uri_string)
   return g_string_free (copy, FALSE);
 }
 
+static gboolean
+scheme_is_http_like (const char *scheme)
+{
+  const char * const schemes[] = { "https", "http", "wss", "ws" };
+  for (int i = 0; i < G_N_ELEMENTS (schemes); ++i)
+    {
+      if (!strcmp (schemes[i], scheme))
+        return TRUE;
+    }
+  return FALSE;
+}
+
+static int
+normalize_port (const char *scheme,
+                int         port)
+{
+  char *default_schemes[3] = { };
+
+  switch (port)
+    {
+    case 21:
+      default_schemes[0] = "ftp";
+      break;
+    case 80:
+      default_schemes[0] = "http";
+      default_schemes[1] = "ws";
+      break;
+    case 443:
+      default_schemes[0] = "https";
+      default_schemes[1] = "wss";
+      break;
+    default:
+      break;
+    }
+
+  for (gsize i = 0; default_schemes[i]; ++i)
+    {
+      if (!strcmp (scheme, default_schemes[i]))
+        return -1;
+    }
+
+  return port;
+}
+
 static gboolean
 g_uri_split_internal (const gchar  *uri_string,
                       GUriFlags     flags,
@@ -766,6 +810,7 @@ g_uri_split_internal (const gchar  *uri_string,
   const gchar *end, *colon, *at, *path_start, *semi, *question;
   const gchar *p, *bracket, *hostend;
   gchar *cleaned_uri_string = NULL;
+  gchar *normalized_scheme = NULL;
 
   if (scheme)
     *scheme = NULL;
@@ -803,8 +848,9 @@ g_uri_split_internal (const gchar  *uri_string,
 
   if (p > uri_string && *p == ':')
     {
+      normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
       if (scheme)
-        *scheme = g_ascii_strdown (uri_string, p - uri_string);
+        *scheme = g_steal_pointer (&normalized_scheme);
       p++;
     }
   else
@@ -930,6 +976,22 @@ g_uri_split_internal (const gchar  *uri_string,
                       G_URI_ERROR_BAD_PATH, error))
     goto fail;
 
+  /* Scheme-based normalization */
+  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
+    {
+      const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
+
+      if (scheme_is_http_like (scheme_str) && path && !**path)
+        {
+          g_free (*path);
+          *path = g_strdup ("/");
+        }
+
+      if (port && *port != -1)
+        *port = normalize_port (scheme_str, *port);
+    }
+
+  g_free (normalized_scheme);
   g_free (cleaned_uri_string);
   return TRUE;
 
@@ -949,6 +1011,7 @@ g_uri_split_internal (const gchar  *uri_string,
   if (fragment)
     g_clear_pointer (fragment, g_free);
 
+  g_free (normalized_scheme);
   g_free (cleaned_uri_string);
   return FALSE;
 }
diff --git a/glib/guri.h b/glib/guri.h
index 3a7bb5c0e..587848766 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -62,6 +62,10 @@ void         g_uri_unref            (GUri *uri);
  * @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only.
  * @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the
  *     fragment only.
+ * @G_URI_FLAGS_SCHEME_NORMALIZE: Applies scheme-based normalization to the
+ *     parsed URI. For example when parsing an HTTP URI changing empty paths
+ *     to `/` and changing port `80` to `-1`. This only supports a subset
+ *     of known schemes.
  *
  * Flags that describe a URI.
  *
@@ -83,6 +87,7 @@ typedef enum {
   G_URI_FLAGS_ENCODED_QUERY   = 1 << 5,
   G_URI_FLAGS_ENCODED_PATH    = 1 << 6,
   G_URI_FLAGS_ENCODED_FRAGMENT = 1 << 7,
+  G_URI_FLAGS_SCHEME_NORMALIZE = 1 << 8,
 } GUriFlags;
 
 GLIB_AVAILABLE_IN_2_66
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index e769a9ea5..492e6d268 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1715,16 +1715,27 @@ static const struct
   GUriFlags flags;
   /* Outputs */
   const gchar *path;
+  int port;
 } normalize_tests[] =
   {
     { "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
-      "/path%20with%20spaces" },
+      "/path%20with%20spaces", -1 },
     { "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
-      "/path%20with%20spaces%202" },
+      "/path%20with%20spaces%202", -1 },
     { "http://foo/%aa";, G_URI_FLAGS_ENCODED,
-      "/%AA" },
+      "/%AA", -1 },
     { "http://foo/%☺";, G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
-      "/%%FFFFFFE2%FFFFFF98%FFFFFFBA" },
+      "/%%FFFFFFE2%FFFFFF98%FFFFFFBA", -1 },
+    { "http://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "/", -1 },
+    { "nothttp://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "", -1 },
+    { "http://foo:80";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "/", -1 },
+    { "ftp://foo:21";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "", -1 },
+    { "nothttp://foo:80";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "", 80 },
   };
 
 static void
@@ -1737,6 +1748,7 @@ test_uri_normalize (void)
                                NULL);
       g_assert_nonnull (uri);
       g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
+      g_assert_cmpint (g_uri_get_port (uri), ==, normalize_tests[i].port);
     }
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]