[gtksourceview/wip/regex-search] Regex search
- From: Sébastien Wilmet <swilmet src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview/wip/regex-search] Regex search
- Date: Sat, 20 Jul 2013 12:49:57 +0000 (UTC)
commit 7aef068afa8ed1cea7785a62f585e5e1ea66e106
Author: Sébastien Wilmet <swilmet gnome org>
Date: Thu Jul 11 16:44:08 2013 +0200
Regex search
All features work (highlighting, forward/backward sync and async search,
replace, etc.). Remaining problems:
- There can be performances issues for corner cases, like a pattern that
match the entire buffer.
- GRegex can report errors. We should at least report the error when the
pattern can not be compiled. There can also be errors while matching
(with a correct pattern), but I can not give an example. So it's
simpler to just print a warning in these cases, and continue the
search.
- To search at word boundaries, \b is added at the beginning and at the
end of the pattern. But \b is not the same as
gtk_text_iter_starts_word() and gtk_text_iter_ends_word(). \b for
example doesn't take the underscore as a word boundary.
Using gtk_text_iter_starts_word() and ends_word() for regex searches
is not easily possible: if the GRegex return a match, but doesn't
start and end a word, maybe a shorter match (for a greedy pattern)
start and end a word, or a longer match (for an ungreedy pattern). To
be able to use the gtk_text_iter_starts_word() and ends_word()
functions for regex search, g_regex_match_all_full() must be used, to
retrieve _all_ matches, and test the word boundaries until a match is
OK.
- Write unit tests.
docs/reference/gtksourceview-3.0-sections.txt | 2 +
gtksourceview/gtksourcebuffer.c | 72 +++-
gtksourceview/gtksourcebuffer.h | 6 +
gtksourceview/gtksourcesearch.c | 777 ++++++++++++++++++++++++-
gtksourceview/gtksourcesearch.h | 7 +
tests/test-search-ui.c | 9 +
tests/test-search-ui.ui | 17 +
7 files changed, 862 insertions(+), 28 deletions(-)
---
diff --git a/docs/reference/gtksourceview-3.0-sections.txt b/docs/reference/gtksourceview-3.0-sections.txt
index e04cebc..a6dfc7c 100644
--- a/docs/reference/gtksourceview-3.0-sections.txt
+++ b/docs/reference/gtksourceview-3.0-sections.txt
@@ -46,6 +46,8 @@ gtk_source_buffer_set_search_at_word_boundaries
gtk_source_buffer_get_search_at_word_boundaries
gtk_source_buffer_set_search_wrap_around
gtk_source_buffer_get_search_wrap_around
+gtk_source_buffer_set_regex_search
+gtk_source_buffer_get_regex_search
gtk_source_buffer_set_highlight_search
gtk_source_buffer_get_highlight_search
gtk_source_buffer_get_search_occurrences_count
diff --git a/gtksourceview/gtksourcebuffer.c b/gtksourceview/gtksourcebuffer.c
index 430cf71..f79c338 100644
--- a/gtksourceview/gtksourcebuffer.c
+++ b/gtksourceview/gtksourcebuffer.c
@@ -7,6 +7,7 @@
* Jeroen Zwartepoorte <jeroen xs4all nl>
* Copyright (C) 2003 - Paolo Maggi <paolo maggi polito it> and
* Gustavo Giráldez <gustavo giraldez gmx net>
+ * Copyright (C) 2013 - Sébastien Wilmet <swilmet gnome org>
*
* GtkSourceView is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -171,7 +172,8 @@ enum {
PROP_SEARCH_OCCURRENCES_COUNT,
PROP_CASE_SENSITIVE_SEARCH,
PROP_SEARCH_AT_WORD_BOUNDARIES,
- PROP_SEARCH_WRAP_AROUND
+ PROP_SEARCH_WRAP_AROUND,
+ PROP_REGEX_SEARCH
};
struct _GtkSourceBufferPrivate
@@ -483,6 +485,21 @@ gtk_source_buffer_class_init (GtkSourceBufferClass *klass)
TRUE,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ /**
+ * GtkSourceBuffer:regex-search:
+ *
+ * Search by regular expression.
+ *
+ * Since: 3.10
+ */
+ g_object_class_install_property (object_class,
+ PROP_REGEX_SEARCH,
+ g_param_spec_boolean ("regex-search",
+ _("Regex search"),
+ _("Search by regular expression"),
+ FALSE,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+
param_types[0] = GTK_TYPE_TEXT_ITER | G_SIGNAL_TYPE_STATIC_SCOPE;
param_types[1] = GTK_TYPE_TEXT_ITER | G_SIGNAL_TYPE_STATIC_SCOPE;
@@ -737,6 +754,11 @@ gtk_source_buffer_set_property (GObject *object,
g_value_get_boolean (value));
break;
+ case PROP_REGEX_SEARCH:
+ _gtk_source_search_set_regex_enabled (source_buffer->priv->search,
+ g_value_get_boolean (value));
+ break;
+
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -816,6 +838,10 @@ gtk_source_buffer_get_property (GObject *object,
g_value_set_boolean (value, _gtk_source_search_get_wrap_around
(source_buffer->priv->search));
break;
+ case PROP_REGEX_SEARCH:
+ g_value_set_boolean (value, _gtk_source_search_get_regex_enabled
(source_buffer->priv->search));
+ break;
+
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -2877,6 +2903,50 @@ gtk_source_buffer_get_search_wrap_around (GtkSourceBuffer *buffer)
}
/**
+ * gtk_source_buffer_set_regex_search:
+ * @buffer: a #GtkSourceBuffer.
+ * @regex: the setting.
+ *
+ * Enables or disables the regular expression search.
+ *
+ * Since: 3.10
+ */
+void
+gtk_source_buffer_set_regex_search (GtkSourceBuffer *buffer,
+ gboolean regex)
+{
+ gboolean cur_val;
+
+ g_return_if_fail (GTK_SOURCE_IS_BUFFER (buffer));
+
+ regex = regex != FALSE;
+
+ cur_val = _gtk_source_search_get_regex_enabled (buffer->priv->search);
+
+ if (cur_val != regex)
+ {
+ _gtk_source_search_set_regex_enabled (buffer->priv->search, regex);
+
+ g_object_notify (G_OBJECT (buffer), "regex-search");
+ }
+}
+
+/**
+ * gtk_source_buffer_get_regex_search:
+ * @buffer: a #GtkSourceBuffer.
+ *
+ * Returns: whether to search by regular expression.
+ * Since: 3.10
+ */
+gboolean
+gtk_source_buffer_get_regex_search (GtkSourceBuffer *buffer)
+{
+ g_return_val_if_fail (GTK_SOURCE_IS_BUFFER (buffer), FALSE);
+
+ return _gtk_source_search_get_regex_enabled (buffer->priv->search);
+}
+
+/**
* gtk_source_buffer_set_highlight_search:
* @buffer: a #GtkSourceBuffer.
* @highlight: the setting.
diff --git a/gtksourceview/gtksourcebuffer.h b/gtksourceview/gtksourcebuffer.h
index c95b1d2..144f414 100644
--- a/gtksourceview/gtksourcebuffer.h
+++ b/gtksourceview/gtksourcebuffer.h
@@ -6,6 +6,7 @@
* Chris Phelps <chicane reninet com> and
* Jeroen Zwartepoorte <jeroen xs4all nl>
* Copyright (C) 2003 - Paolo Maggi, Gustavo Giráldez
+ * Copyright (C) 2013 - Sébastien Wilmet <swilmet gnome org>
*
* GtkSourceView is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -200,6 +201,11 @@ void gtk_source_buffer_set_search_wrap_around
(GtkSourceBuffer *buffer,
gboolean gtk_source_buffer_get_search_wrap_around (GtkSourceBuffer
*buffer);
+void gtk_source_buffer_set_regex_search (GtkSourceBuffer
*buffer,
+ gboolean
regex);
+
+gboolean gtk_source_buffer_get_regex_search (GtkSourceBuffer
*buffer);
+
void gtk_source_buffer_set_highlight_search (GtkSourceBuffer
*buffer,
gboolean
highlight);
diff --git a/gtksourceview/gtksourcesearch.c b/gtksourceview/gtksourcesearch.c
index bcd2c76..54a78ef 100644
--- a/gtksourceview/gtksourcesearch.c
+++ b/gtksourceview/gtksourcesearch.c
@@ -22,8 +22,8 @@
#include "gtksourcesearch.h"
#include "gtksourcebuffer.h"
#include "gtksourcestylescheme.h"
-#include "gtktextregion.h"
#include "gtksourcestyle-private.h"
+#include "gtktextregion.h"
#include <string.h>
@@ -99,6 +99,59 @@
* - Rewrite the code to implement the simpler solution explained above :-)
*/
+/* Regex search:
+ *
+ * With a regex, we don't know how many lines a match can span. A regex will
+ * most probably match only one line, but a regex can contain something like
+ * "\n*", or the dot metacharacter can also match newlines, with the "?s" option
+ * (see G_REGEX_DOTALL).
+ * Therefore a simple solution is to always begin the search at the beginning of
+ * the document. Only the scan_region is taken into account for scanning the
+ * buffer.
+ *
+ * For non-regex searches, when there is an insertion or deletion in the buffer,
+ * we don't need to re-scan all the buffer. If there is an unmodified match in
+ * the neighborhood, no need to re-scan it. For a regex search, it is more
+ * complicated. An insertion or deletion outside a match can modify a match
+ * located in the neighborhood. Take for example the regex "(aa)+" with the
+ * buffer contents "aaa". There is one occurrence: the first two letters. If we
+ * insert an extra 'a' at the end of the buffer, the occurrence is modified to
+ * take the next two letters. That's why the buffer is re-scanned entirely on
+ * each insertion or deletion in the buffer.
+ *
+ * For searching the matches, the easiest solution is to retrieve all the buffer
+ * contents, and search the occurrences on this big string. But it takes a lot
+ * of memory space. It is better to do multi-segment matching, also called
+ * incremental matching. See the pcrepartial(3) manpage. The matching is done
+ * segment by segment, with the G_REGEX_MATCH_PARTIAL_HARD flag (for reasons
+ * explained in the manpage). We begin by the first segment of the buffer as the
+ * subject string. If a partial match is returned, we append the next segment to
+ * the subject string, and we try again to find a complete match. When a
+ * complete match is returned, we must continue to search the next occurrences.
+ * The max lookbehind of the pattern must be retrieved. The start of the next
+ * subject string is located at max_lookbehind characters before the end of the
+ * previously found match. Similarly, if no match is found (neither a complete
+ * match nor a partial match), we take the next segment, with the last
+ * max_lookbehind characters from the previous segment.
+ *
+ * TODO/idea:
+ * What we would like to support in applications is the incremental search:
+ * while we type the pattern, the buffer is scanned and the matches are
+ * highlighted. When the pattern is not fully typed, strange things can happen,
+ * including a pattern that match the entire buffer. And if the user is
+ * working on a really big file, catastrophe: the UI is blocked!
+ * To avoid this problem, a solution is to search the buffer differently
+ * depending on the situation:
+ * - First situation: the subject string to scan is small enough, we retrieve it
+ * and scan it directly.
+ * - Second situation: the subject string to scan is too big, it will take
+ * too much time to retrieve it and scan it directly. We handle this situation
+ * in three phases: (1) retrieving the subject string, chunks by chunks, in
+ * several idle loop iterations. (2) Once the subject string is retrieved
+ * completely, we launch the regex matching in a thread. (3) Once the thread
+ * is finished, we highlight the matches in the buffer. And voilà.
+ */
+
/*
#define ENABLE_DEBUG
*/
@@ -142,9 +195,11 @@ struct _GtkSourceSearchPrivate
/* State of the search. If text is NULL, the search is disabled. */
gchar *text;
gint text_nb_lines;
+ GRegex *regex;
GtkTextSearchFlags flags;
guint at_word_boundaries : 1;
guint wrap_around : 1;
+ guint regex_enabled : 1;
guint highlight : 1;
};
@@ -391,6 +446,163 @@ clear_search (GtkSourceSearch *search)
search->priv->occurrences_count = 0;
}
+static void
+regex_search_get_real_start (GtkSourceSearch *search,
+ const GtkTextIter *start,
+ GtkTextIter *real_start,
+ gint *start_pos)
+{
+ gint max_lookbehind = g_regex_get_max_lookbehind (search->priv->regex);
+
+ *real_start = *start;
+
+ for (*start_pos = 0; *start_pos < max_lookbehind; (*start_pos)++)
+ {
+ if (!gtk_text_iter_backward_char (real_start))
+ {
+ break;
+ }
+ }
+}
+
+/* Get the @match_start and @match_end iters of the @match_info.
+ * g_match_info_fetch_pos() returns byte positions. To get the iters, we need to
+ * know the number of UTF-8 characters. A GMatchInfo can contain several matches
+ * (with g_match_info_next()). So instead of calling g_utf8_strlen() each time
+ * at the beginning of @subject, @iter and @iter_byte_pos are used to remember
+ * where g_utf8_strlen() stopped.
+ */
+static gboolean
+regex_search_fetch_match (GMatchInfo *match_info,
+ const gchar *subject,
+ gssize subject_length,
+ GtkTextIter *iter,
+ gint *iter_byte_pos,
+ GtkTextIter *match_start,
+ GtkTextIter *match_end)
+{
+ gint start_byte_pos;
+ gint end_byte_pos;
+ gint nb_chars;
+
+ g_assert (*iter_byte_pos <= subject_length);
+ g_assert (match_start != NULL);
+ g_assert (match_end != NULL);
+
+ if (!g_match_info_matches (match_info))
+ {
+ return FALSE;
+ }
+
+ if (!g_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
+ {
+ g_warning ("Impossible to fetch regex match position.");
+ return FALSE;
+ }
+
+ g_assert (start_byte_pos < subject_length);
+ g_assert (end_byte_pos <= subject_length);
+ g_assert (*iter_byte_pos <= start_byte_pos);
+ g_assert (start_byte_pos < end_byte_pos);
+
+ nb_chars = g_utf8_strlen (subject + *iter_byte_pos,
+ start_byte_pos - *iter_byte_pos);
+
+ *match_start = *iter;
+ gtk_text_iter_forward_chars (match_start, nb_chars);
+
+ nb_chars = g_utf8_strlen (subject + start_byte_pos,
+ end_byte_pos - start_byte_pos);
+
+ *match_end = *match_start;
+ gtk_text_iter_forward_chars (match_end, nb_chars);
+
+ *iter = *match_end;
+ *iter_byte_pos = end_byte_pos;
+
+ return TRUE;
+}
+
+static gboolean
+basic_forward_regex_search (GtkSourceSearch *search,
+ const GtkTextIter *start_at,
+ GtkTextIter *match_start,
+ GtkTextIter *match_end,
+ const GtkTextIter *limit)
+{
+ GtkTextIter real_start;
+ GtkTextIter end;
+ gint start_pos;
+ gchar *subject;
+ gssize subject_length;
+ GRegexMatchFlags match_options = 0;
+ GMatchInfo *match_info;
+ GError *error = NULL;
+ GtkTextIter iter;
+ gint iter_byte_pos;
+ gboolean found;
+
+ if (search->priv->regex == NULL)
+ {
+ return FALSE;
+ }
+
+ regex_search_get_real_start (search, start_at, &real_start, &start_pos);
+
+ if (limit == NULL)
+ {
+ gtk_text_buffer_get_end_iter (search->priv->buffer, &end);
+ }
+ else
+ {
+ end = *limit;
+ }
+
+ if (!gtk_text_iter_starts_line (&real_start))
+ {
+ match_options |= G_REGEX_MATCH_NOTBOL;
+ }
+
+ if (!gtk_text_iter_ends_line (&end))
+ {
+ match_options |= G_REGEX_MATCH_NOTEOL;
+ }
+
+ subject = gtk_text_iter_get_visible_text (&real_start, &end);
+ subject_length = strlen (subject);
+
+ g_regex_match_full (search->priv->regex,
+ subject,
+ subject_length,
+ start_pos,
+ match_options,
+ &match_info,
+ &error);
+
+ iter = real_start;
+ iter_byte_pos = 0;
+
+ found = regex_search_fetch_match (match_info,
+ subject,
+ subject_length,
+ &iter,
+ &iter_byte_pos,
+ match_start,
+ match_end);
+
+ if (error != NULL)
+ {
+ g_warning ("Regex matching error: %s", error->message);
+ g_error_free (error);
+ found = FALSE;
+ }
+
+ g_free (subject);
+ g_match_info_free (match_info);
+
+ return found;
+}
+
static gboolean
basic_forward_search (GtkSourceSearch *search,
const GtkTextIter *iter,
@@ -405,6 +617,15 @@ basic_forward_search (GtkSourceSearch *search,
return FALSE;
}
+ if (search->priv->regex_enabled)
+ {
+ return basic_forward_regex_search (search,
+ iter,
+ match_start,
+ match_end,
+ limit);
+ }
+
while (TRUE)
{
gboolean found = gtk_text_iter_forward_search (&begin_search,
@@ -429,6 +650,102 @@ basic_forward_search (GtkSourceSearch *search,
}
}
+/* We fake the backward regex search by doing a forward search, and taking the
+ * last match.
+ */
+static gboolean
+basic_backward_regex_search (GtkSourceSearch *search,
+ const GtkTextIter *start_at,
+ GtkTextIter *match_start,
+ GtkTextIter *match_end,
+ const GtkTextIter *limit)
+{
+ GtkTextIter start;
+ GtkTextIter real_start;
+ GtkTextIter end;
+ gint start_pos;
+ gchar *subject;
+ gssize subject_length;
+ GRegexMatchFlags match_options = 0;
+ GMatchInfo *match_info;
+ GError *error = NULL;
+ GtkTextIter iter;
+ gint iter_byte_pos;
+ gboolean found;
+ GtkTextIter tmp_match_start;
+ GtkTextIter tmp_match_end;
+
+ if (search->priv->regex == NULL)
+ {
+ return FALSE;
+ }
+
+ if (limit == NULL)
+ {
+ gtk_text_buffer_get_start_iter (search->priv->buffer, &start);
+ }
+ else
+ {
+ start = *limit;
+ }
+
+ regex_search_get_real_start (search, &start, &real_start, &start_pos);
+
+ end = *start_at;
+
+ if (!gtk_text_iter_starts_line (&real_start))
+ {
+ match_options |= G_REGEX_MATCH_NOTBOL;
+ }
+
+ if (!gtk_text_iter_ends_line (&end))
+ {
+ match_options |= G_REGEX_MATCH_NOTEOL;
+ }
+
+ subject = gtk_text_iter_get_visible_text (&real_start, &end);
+ subject_length = strlen (subject);
+
+ g_regex_match_full (search->priv->regex,
+ subject,
+ subject_length,
+ start_pos,
+ match_options,
+ &match_info,
+ &error);
+
+ iter = real_start;
+ iter_byte_pos = 0;
+
+ while (regex_search_fetch_match (match_info,
+ subject,
+ subject_length,
+ &iter,
+ &iter_byte_pos,
+ &tmp_match_start,
+ &tmp_match_end))
+ {
+ found = TRUE;
+
+ *match_start = tmp_match_start;
+ *match_end = tmp_match_end;
+
+ g_match_info_next (match_info, &error);
+ }
+
+ if (error != NULL)
+ {
+ g_warning ("Regex matching error: %s", error->message);
+ g_error_free (error);
+ found = FALSE;
+ }
+
+ g_free (subject);
+ g_match_info_free (match_info);
+
+ return found;
+}
+
static gboolean
basic_backward_search (GtkSourceSearch *search,
const GtkTextIter *iter,
@@ -443,6 +760,15 @@ basic_backward_search (GtkSourceSearch *search,
return FALSE;
}
+ if (search->priv->regex_enabled)
+ {
+ return basic_backward_regex_search (search,
+ iter,
+ match_start,
+ match_end,
+ limit);
+ }
+
while (TRUE)
{
gboolean found = gtk_text_iter_backward_search (&begin_search,
@@ -1175,20 +1501,11 @@ scan_region_backward (GtkSourceSearch *search,
}
static void
-scan_task_region (GtkSourceSearch *search)
+resume_task (GtkSourceSearch *search)
{
ForwardBackwardData *task_data = g_task_get_task_data (search->priv->task);
GtkTextIter start_at;
- if (task_data->is_forward)
- {
- scan_region_forward (search, search->priv->task_region);
- }
- else
- {
- scan_region_backward (search, search->priv->task_region);
- }
-
if (search->priv->task_region != NULL)
{
gtk_text_region_destroy (search->priv->task_region, TRUE);
@@ -1213,8 +1530,25 @@ scan_task_region (GtkSourceSearch *search)
}
}
+static void
+scan_task_region (GtkSourceSearch *search)
+{
+ ForwardBackwardData *task_data = g_task_get_task_data (search->priv->task);
+
+ if (task_data->is_forward)
+ {
+ scan_region_forward (search, search->priv->task_region);
+ }
+ else
+ {
+ scan_region_backward (search, search->priv->task_region);
+ }
+
+ resume_task (search);
+}
+
static gboolean
-idle_scan_cb (GtkSourceSearch *search)
+idle_scan_normal_search (GtkSourceSearch *search)
{
if (search->priv->high_priority_region != NULL)
{
@@ -1256,6 +1590,281 @@ idle_scan_cb (GtkSourceSearch *search)
return G_SOURCE_CONTINUE;
}
+/* Just remove the found_tag's located in the high-priority region. For big
+ * documents, if the pattern is modified, it can take some time to re-scan all
+ * the buffer, so it's better to clear the highlighting as soon as possible. If
+ * the highlighting is not cleared, the user can wrongly think that the new
+ * pattern matches the old occurrences.
+ * The drawback of clearing the highlighting is that for small documents, there
+ * is some flickering.
+ */
+static void
+regex_search_handle_high_priority_region (GtkSourceSearch *search)
+{
+ GtkTextIter start;
+ GtkTextIter end;
+ GtkTextRegion *region;
+ GtkTextRegionIterator region_iter;
+ gint nb_subregions = gtk_text_region_subregions (search->priv->high_priority_region);
+
+ if (nb_subregions == 0)
+ {
+ return;
+ }
+
+ gtk_text_region_nth_subregion (search->priv->high_priority_region,
+ 0,
+ &start,
+ NULL);
+
+ gtk_text_region_nth_subregion (search->priv->high_priority_region,
+ nb_subregions - 1,
+ NULL,
+ &end);
+
+ region = gtk_text_region_intersect (search->priv->scan_region,
+ &start,
+ &end);
+
+ gtk_text_region_get_iterator (region, ®ion_iter, 0);
+
+ while (!gtk_text_region_iterator_is_end (®ion_iter))
+ {
+ GtkTextIter subregion_start;
+ GtkTextIter subregion_end;
+
+ gtk_text_region_iterator_get_subregion (®ion_iter,
+ &subregion_start,
+ &subregion_end);
+
+ gtk_text_buffer_remove_tag (search->priv->buffer,
+ search->priv->found_tag,
+ &subregion_start,
+ &subregion_end);
+
+ gtk_text_region_iterator_next (®ion_iter);
+ }
+
+ gtk_text_region_destroy (region, TRUE);
+}
+
+/* Returns TRUE if the segment is finished, and FALSE on partial match. */
+static gboolean
+regex_search_scan_segment (GtkSourceSearch *search,
+ const GtkTextIter *segment_start,
+ const GtkTextIter *segment_end,
+ GtkTextIter *stopped_at)
+{
+ GtkTextIter real_start;
+ gint start_pos;
+ gchar *subject;
+ gssize subject_length;
+ GRegexMatchFlags match_options = 0;
+ GMatchInfo *match_info;
+ GError *error = NULL;
+ GtkTextIter iter;
+ gint iter_byte_pos;
+ gboolean segment_finished;
+ GtkTextIter match_start;
+ GtkTextIter match_end;
+
+ g_assert (stopped_at != NULL);
+
+ gtk_text_buffer_remove_tag (search->priv->buffer,
+ search->priv->found_tag,
+ segment_start,
+ segment_end);
+
+ if (search->priv->regex == NULL)
+ {
+ *stopped_at = *segment_end;
+ return TRUE;
+ }
+
+ regex_search_get_real_start (search,
+ segment_start,
+ &real_start,
+ &start_pos);
+
+ if (!gtk_text_iter_starts_line (&real_start))
+ {
+ match_options |= G_REGEX_MATCH_NOTBOL;
+ }
+
+ if (!gtk_text_iter_ends_line (segment_end))
+ {
+ match_options |= G_REGEX_MATCH_NOTEOL;
+ }
+
+ if (!gtk_text_iter_is_end (segment_end))
+ {
+ match_options |= G_REGEX_MATCH_PARTIAL_HARD;
+ }
+
+ subject = gtk_text_iter_get_visible_text (&real_start, segment_end);
+ subject_length = strlen (subject);
+
+ g_regex_match_full (search->priv->regex,
+ subject,
+ subject_length,
+ start_pos,
+ match_options,
+ &match_info,
+ &error);
+
+ iter = real_start;
+ iter_byte_pos = 0;
+
+ while (regex_search_fetch_match (match_info,
+ subject,
+ subject_length,
+ &iter,
+ &iter_byte_pos,
+ &match_start,
+ &match_end))
+ {
+ gtk_text_buffer_apply_tag (search->priv->buffer,
+ search->priv->found_tag,
+ &match_start,
+ &match_end);
+
+ search->priv->occurrences_count++;
+
+ g_match_info_next (match_info, &error);
+ }
+
+ if (error != NULL)
+ {
+ g_warning ("Regex matching error: %s", error->message);
+ g_error_free (error);
+ }
+
+ if (g_match_info_is_partial_match (match_info))
+ {
+ *stopped_at = iter;
+ segment_finished = FALSE;
+ }
+ else
+ {
+ *stopped_at = *segment_end;
+ segment_finished = TRUE;
+ }
+
+ g_free (subject);
+ g_match_info_free (match_info);
+
+ return segment_finished;
+}
+
+static void
+regex_search_scan_chunk (GtkSourceSearch *search,
+ const GtkTextIter *chunk_start,
+ const GtkTextIter *chunk_end)
+{
+ GtkTextIter segment_start = *chunk_start;
+
+ if (search->priv->found_tag == NULL)
+ {
+ init_found_tag (search);
+ }
+
+ while (gtk_text_iter_compare (&segment_start, chunk_end) < 0)
+ {
+ GtkTextIter segment_end;
+ GtkTextIter stopped_at;
+ gint nb_lines = 1;
+
+ segment_end = segment_start;
+ gtk_text_iter_forward_line (&segment_end);
+
+ while (!regex_search_scan_segment (search,
+ &segment_start,
+ &segment_end,
+ &stopped_at))
+ {
+ segment_start = stopped_at;
+ gtk_text_iter_forward_lines (&segment_end, nb_lines);
+ nb_lines <<= 1;
+ }
+
+ segment_start = stopped_at;
+ }
+
+ gtk_text_region_subtract (search->priv->scan_region, chunk_start, &segment_start);
+
+ if (search->priv->task_region != NULL)
+ {
+ gtk_text_region_subtract (search->priv->task_region, chunk_start, &segment_start);
+ }
+}
+
+static void
+regex_search_scan_next_chunk (GtkSourceSearch *search)
+{
+ GtkTextIter chunk_start;
+ GtkTextIter chunk_end;
+
+ if (is_text_region_empty (search->priv->scan_region))
+ {
+ return;
+ }
+
+ gtk_text_region_nth_subregion (search->priv->scan_region, 0, &chunk_start, NULL);
+
+ chunk_end = chunk_start;
+ gtk_text_iter_forward_lines (&chunk_end, SCAN_BATCH_SIZE);
+
+ regex_search_scan_chunk (search, &chunk_start, &chunk_end);
+}
+
+static gboolean
+idle_scan_regex_search (GtkSourceSearch *search)
+{
+ if (search->priv->high_priority_region != NULL)
+ {
+ regex_search_handle_high_priority_region (search);
+
+ gtk_text_region_destroy (search->priv->high_priority_region, TRUE);
+ search->priv->high_priority_region = NULL;
+
+ return G_SOURCE_CONTINUE;
+ }
+
+ regex_search_scan_next_chunk (search);
+
+ if (search->priv->task != NULL &&
+ is_text_region_empty (search->priv->task_region))
+ {
+ resume_task (search);
+ return G_SOURCE_CONTINUE;
+ }
+
+ if (is_text_region_empty (search->priv->scan_region))
+ {
+ search->priv->idle_scan_id = 0;
+
+ g_object_notify (G_OBJECT (search->priv->buffer), "search-occurrences-count");
+
+ if (search->priv->scan_region != NULL)
+ {
+ gtk_text_region_destroy (search->priv->scan_region, TRUE);
+ search->priv->scan_region = NULL;
+ }
+
+ return G_SOURCE_REMOVE;
+ }
+
+ return G_SOURCE_CONTINUE;
+}
+
+static gboolean
+idle_scan_cb (GtkSourceSearch *search)
+{
+ return search->priv->regex_enabled ?
+ idle_scan_regex_search (search) :
+ idle_scan_normal_search (search);
+}
+
static void
install_idle_scan (GtkSourceSearch *search)
{
@@ -1458,6 +2067,51 @@ add_subregion_to_scan (GtkSourceSearch *search,
}
static void
+update_regex (GtkSourceSearch *search)
+{
+ if (search->priv->regex != NULL)
+ {
+ g_regex_unref (search->priv->regex);
+ search->priv->regex = NULL;
+ }
+
+ if (search->priv->regex_enabled && search->priv->text != NULL)
+ {
+ GRegexCompileFlags compile_flags = G_REGEX_OPTIMIZE | G_REGEX_MULTILINE;
+ gchar *pattern = search->priv->text;
+ GError *error = NULL;
+
+ search->priv->text_nb_lines = 0;
+
+ if (search->priv->flags & GTK_TEXT_SEARCH_CASE_INSENSITIVE)
+ {
+ compile_flags |= G_REGEX_CASELESS;
+ }
+
+ if (search->priv->at_word_boundaries)
+ {
+ pattern = g_strdup_printf ("\\b%s\\b", search->priv->text);
+ }
+
+ search->priv->regex = g_regex_new (pattern,
+ compile_flags,
+ G_REGEX_MATCH_NOTEMPTY,
+ &error);
+
+ if (error != NULL)
+ {
+ g_warning ("Error with the regex: %s", error->message);
+ g_error_free (error);
+ }
+
+ if (search->priv->at_word_boundaries)
+ {
+ g_free (pattern);
+ }
+ }
+}
+
+static void
update (GtkSourceSearch *search)
{
GtkTextIter start;
@@ -1484,7 +2138,8 @@ insert_text_before_cb (GtkSourceSearch *search,
{
clear_task (search);
- if (search->priv->text != NULL)
+ if (!search->priv->regex_enabled &&
+ search->priv->text != NULL)
{
GtkTextIter start = *location;
GtkTextIter end = *location;
@@ -1500,15 +2155,22 @@ insert_text_after_cb (GtkSourceSearch *search,
gchar *text,
gint length)
{
- GtkTextIter start;
- GtkTextIter end;
+ if (search->priv->regex_enabled)
+ {
+ update (search);
+ }
+ else
+ {
+ GtkTextIter start;
+ GtkTextIter end;
- start = end = *location;
+ start = end = *location;
- gtk_text_iter_backward_chars (&start,
- g_utf8_strlen (text, length));
+ gtk_text_iter_backward_chars (&start,
+ g_utf8_strlen (text, length));
- add_subregion_to_scan (search, &start, &end);
+ add_subregion_to_scan (search, &start, &end);
+ }
}
static void
@@ -1521,6 +2183,11 @@ delete_range_before_cb (GtkSourceSearch *search,
clear_task (search);
+ if (search->priv->regex_enabled)
+ {
+ return;
+ }
+
gtk_text_buffer_get_bounds (search->priv->buffer, &start_buffer, &end_buffer);
if (gtk_text_iter_equal (delete_start, &start_buffer) &&
@@ -1549,7 +2216,14 @@ delete_range_after_cb (GtkSourceSearch *search,
GtkTextIter *start,
GtkTextIter *end)
{
- add_subregion_to_scan (search, start, end);
+ if (search->priv->regex_enabled)
+ {
+ update (search);
+ }
+ else
+ {
+ add_subregion_to_scan (search, start, end);
+ }
}
static void
@@ -1604,6 +2278,11 @@ _gtk_source_search_finalize (GObject *object)
g_free (search->priv->text);
+ if (search->priv->regex != NULL)
+ {
+ g_regex_unref (search->priv->regex);
+ }
+
G_OBJECT_CLASS (_gtk_source_search_parent_class)->finalize (object);
}
@@ -1689,8 +2368,16 @@ _gtk_source_search_set_text (GtkSourceSearch *search,
search->priv->text = g_strdup (text);
}
- search->priv->text_nb_lines = compute_number_of_lines (search->priv->text);
+ if (search->priv->regex_enabled)
+ {
+ search->priv->text_nb_lines = 0;
+ }
+ else
+ {
+ search->priv->text_nb_lines = compute_number_of_lines (search->priv->text);
+ }
+ update_regex (search);
update (search);
}
@@ -1717,6 +2404,7 @@ _gtk_source_search_set_case_sensitive (GtkSourceSearch *search,
search->priv->flags |= GTK_TEXT_SEARCH_CASE_INSENSITIVE;
}
+ update_regex (search);
update (search);
}
@@ -1735,6 +2423,8 @@ _gtk_source_search_set_at_word_boundaries (GtkSourceSearch *search,
g_return_if_fail (GTK_SOURCE_IS_SEARCH (search));
search->priv->at_word_boundaries = at_word_boundaries;
+
+ update_regex (search);
update (search);
}
@@ -1765,6 +2455,26 @@ _gtk_source_search_get_wrap_around (GtkSourceSearch *search)
}
void
+_gtk_source_search_set_regex_enabled (GtkSourceSearch *search,
+ gboolean regex_enabled)
+{
+ g_return_if_fail (GTK_SOURCE_IS_SEARCH (search));
+
+ search->priv->regex_enabled = regex_enabled;
+
+ update_regex (search);
+ update (search);
+}
+
+gboolean
+_gtk_source_search_get_regex_enabled (GtkSourceSearch *search)
+{
+ g_return_val_if_fail (GTK_SOURCE_IS_SEARCH (search), FALSE);
+
+ return search->priv->regex_enabled;
+}
+
+void
_gtk_source_search_set_highlight (GtkSourceSearch *search,
gboolean highlight)
{
@@ -1897,12 +2607,7 @@ _gtk_source_search_update_highlight (GtkSourceSearch *search,
return;
}
- if (synchronous)
- {
- scan_all_region (search, region_to_highlight);
- gtk_text_region_destroy (region_to_highlight, TRUE);
- }
- else
+ if (!synchronous)
{
if (search->priv->high_priority_region != NULL)
{
@@ -1916,6 +2621,24 @@ _gtk_source_search_update_highlight (GtkSourceSearch *search,
search->priv->high_priority_region = region_to_highlight;
install_idle_scan (search);
+ return;
+ }
+
+ if (search->priv->regex_enabled)
+ {
+ GtkTextIter start;
+
+ gtk_text_region_nth_subregion (search->priv->scan_region,
+ 0,
+ &start,
+ NULL);
+
+ regex_search_scan_chunk (search, &start, end);
+ }
+ else
+ {
+ scan_all_region (search, region_to_highlight);
+ gtk_text_region_destroy (region_to_highlight, TRUE);
}
}
diff --git a/gtksourceview/gtksourcesearch.h b/gtksourceview/gtksourcesearch.h
index d8b10d0..fc7ed5d 100644
--- a/gtksourceview/gtksourcesearch.h
+++ b/gtksourceview/gtksourcesearch.h
@@ -86,6 +86,13 @@ G_GNUC_INTERNAL
gboolean _gtk_source_search_get_wrap_around (GtkSourceSearch *search);
G_GNUC_INTERNAL
+void _gtk_source_search_set_regex_enabled (GtkSourceSearch *search,
+ gboolean
regex_enabled);
+
+G_GNUC_INTERNAL
+gboolean _gtk_source_search_get_regex_enabled (GtkSourceSearch *search);
+
+G_GNUC_INTERNAL
void _gtk_source_search_set_highlight (GtkSourceSearch *search,
gboolean highlight);
diff --git a/tests/test-search-ui.c b/tests/test-search-ui.c
index 86f3b66..b0151d6 100644
--- a/tests/test-search-ui.c
+++ b/tests/test-search-ui.c
@@ -343,6 +343,14 @@ wrap_around_toggled_cb (TestSearchUI *search,
}
static void
+regex_toggled_cb (TestSearchUI *search,
+ GtkToggleButton *button)
+{
+ gtk_source_buffer_set_regex_search (search->priv->source_buffer,
+ gtk_toggle_button_get_active (button));
+}
+
+static void
test_search_ui_dispose (GObject *object)
{
TestSearchUI *search = TEST_SEARCH_UI (object);
@@ -381,6 +389,7 @@ test_search_ui_class_init (TestSearchUIClass *klass)
gtk_widget_class_bind_callback (widget_class, match_case_toggled_cb);
gtk_widget_class_bind_callback (widget_class, at_word_boundaries_toggled_cb);
gtk_widget_class_bind_callback (widget_class, wrap_around_toggled_cb);
+ gtk_widget_class_bind_callback (widget_class, regex_toggled_cb);
}
static void
diff --git a/tests/test-search-ui.ui b/tests/test-search-ui.ui
index 7da2630..9f2a39e 100644
--- a/tests/test-search-ui.ui
+++ b/tests/test-search-ui.ui
@@ -218,6 +218,23 @@
<property name="height">1</property>
</packing>
</child>
+ <child>
+ <object class="GtkCheckButton" id="checkbutton_regex">
+ <property name="label">Regex</property>
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="receives_default">False</property>
+ <property name="xalign">0</property>
+ <property name="draw_indicator">True</property>
+ <signal name="toggled" handler="regex_toggled_cb" object="TestSearchUI" swapped="yes"/>
+ </object>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="top_attach">4</property>
+ <property name="width">1</property>
+ <property name="height">1</property>
+ </packing>
+ </child>
</object>
<packing>
<property name="left_attach">0</property>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]