[glib/wip/pcre-mark: 1/2] regex: Add g_match_info_get_mark
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc: 
- Subject: [glib/wip/pcre-mark: 1/2] regex: Add g_match_info_get_mark
- Date: Mon,  2 Jul 2012 14:17:14 +0000 (UTC)
commit d18c3010156bdccebdf18f09aaaac334c0b5d391
Author: Christian Persch <chpe gnome org>
Date:   Fri Jun 8 00:49:00 2012 +0200
    regex: Add g_match_info_get_mark
    
    Since PCRE 8.03, PCRE supports backtracking control verbs with a name argument.
    g_match_info_get_mark() will return the argument of the last encountered verb
    in the whole matching process for failed or partial matches, and in the matching
    path only for matches.
 docs/reference/glib/glib-sections.txt |    1 +
 glib/gregex.c                         |   40 +++++++++++++++++++++++++-
 glib/gregex.h                         |    2 +
 glib/tests/regex.c                    |   50 +++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 2 deletions(-)
---
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index d2e985e..02652a7 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -1026,6 +1026,7 @@ g_regex_check_replacement
 GMatchInfo
 g_match_info_get_regex
 g_match_info_get_string
+g_match_info_get_mark
 g_match_info_ref
 g_match_info_unref
 g_match_info_free
diff --git a/glib/gregex.c b/glib/gregex.c
index 839b1e6..e69b2c0 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -214,6 +214,8 @@ struct _GMatchInfo
   gint n_workspace;             /* number of workspace elements */
   const gchar *string;          /* string passed to the match function */
   gssize string_len;            /* length of string */
+  /* const */ guchar *mark;     /* MARK when using backtracing control */
+  pcre_extra extra;             /* pcre_extra data */
 };
 
 struct _GRegex
@@ -592,6 +594,20 @@ match_info_new (const GRegex *regex,
   match_info->offsets[0] = -1;
   match_info->offsets[1] = -1;
 
+  if (!is_dfa)
+    {
+      /* We need a pcre_extra to store a pointer to GMatchInfo::mark
+       * where pcre_exec will store the MARK.
+       * Since pcre_exec does not modify the extra data otherwise,
+       * it should be safe to do a shallow copy here.
+       */
+      if (regex->extra)
+        match_info->extra = *regex->extra;
+
+      match_info->extra.flags |= PCRE_EXTRA_MARK;
+      match_info->extra.mark = &match_info->mark;
+    }
+
   return match_info;
 }
 
@@ -634,6 +650,27 @@ g_match_info_get_string (const GMatchInfo *match_info)
 }
 
 /**
+ * g_match_info_get_mark:
+ * @match_info: a #GMatchInfo structure
+ *
+ * When the pattern contains backtracking control verbs, and there is
+ * a match, returns the argument of the verb last encountered on the
+ * matching path. If there is a partial match, or no match, returns
+ * the argument of the last verb encountered in the whole matching
+ * process. Otherwise, $NULL is returned.
+ *
+ * Returns: (transfer none): the mark, or %NULL
+ *
+ * Since: 2.34
+ */
+const gchar *
+g_match_info_get_mark (const GMatchInfo *match_info)
+{
+  g_return_val_if_fail (match_info != NULL, NULL);
+  return (const gchar *) match_info->mark;
+}
+
+/**
  * g_match_info_ref:
  * @match_info: a #GMatchInfo
  *
@@ -729,7 +766,7 @@ g_match_info_next (GMatchInfo  *match_info,
     }
 
   match_info->matches = pcre_exec (match_info->regex->pcre_re,
-                                   match_info->regex->extra,
+                                   &match_info->extra,
                                    match_info->string,
                                    match_info->string_len,
                                    match_info->pos,
@@ -1223,7 +1260,6 @@ g_match_info_fetch_all (const GMatchInfo *match_info)
   return result;
 }
 
-
 /* GRegex */
 
 GQuark
diff --git a/glib/gregex.h b/glib/gregex.h
index 29e5c6a..6944406 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -527,6 +527,8 @@ gboolean	  g_regex_check_replacement	(const gchar         *replacement,
 /* Match info */
 GRegex		 *g_match_info_get_regex	(const GMatchInfo    *match_info);
 const gchar      *g_match_info_get_string       (const GMatchInfo    *match_info);
+const gchar      *g_match_info_get_mark         (const GMatchInfo    *match_info);
+
 
 GMatchInfo       *g_match_info_ref              (GMatchInfo          *match_info);
 void              g_match_info_unref            (GMatchInfo          *match_info);
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index 72a0155..ed5ab80 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -2048,6 +2048,48 @@ test_explicit_crlf (void)
   g_regex_unref (regex);
 }
 
+
+typedef struct {
+  const gchar *pattern;
+  const gchar *string;
+  const gchar *mark;
+  gboolean expected;
+} TestMarkData;
+
+static void
+test_mark (gconstpointer d)
+{
+  const TestMarkData *data = d;
+  GRegex *regex;
+  GMatchInfo *info;
+  gboolean match;
+  GError *error = NULL;
+
+  regex = g_regex_new (data->pattern, 0, 0, &error);
+  g_assert_no_error (error);
+
+  match = g_regex_match_full (regex, data->string, -1, 0, 0, &info, NULL);
+  g_assert_cmpint (match, ==, data->expected);
+  g_assert_cmpstr (g_match_info_get_mark (info), ==, data->mark);
+
+  g_match_info_free (info);
+  g_regex_unref (regex);
+}
+
+#define TEST_MARK(_pattern, _string, _expected, _mark) \
+{ \
+  TestMarkData *data; \
+  gchar *path; \
+  data = g_new0 (TestMarkData, 1); \
+  data->pattern = _pattern; \
+  data->string = _string; \
+  data->mark = _mark; \
+  data->expected = _expected; \
+  path = g_strdup_printf ("/regex/mark/%d", ++total); \
+  g_test_add_data_func (path, data, test_mark); \
+  g_free (path); \
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -2703,5 +2745,13 @@ main (int argc, char *argv[])
   TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE);
   TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE);
 
+  /* MARK */
+  TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "AC", FALSE, "A");
+  TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "CB", FALSE, "B");
+  TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "C", TRUE, "A");
+  TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "D", FALSE, "A");
+  TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XY", TRUE, "A");
+  TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XZ", TRUE, "B");
+
   return g_test_run ();
 }
[
Date Prev][
Date Next]   [
Thread Prev][
Thread Next]   
[
Thread Index]
[
Date Index]
[
Author Index]