[evince/333-handle-spaces-and-hyphenation-when-search-pdf: 91/91] Add support for text search across lines
- From: Germán Poo-Caamaño <gpoo src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evince/333-handle-spaces-and-hyphenation-when-search-pdf: 91/91] Add support for text search across lines
- Date: Sun, 24 Nov 2019 03:08:49 +0000 (UTC)
commit d2d6415c8388f60fd38a828ae41d2331bed10a0a
Author: Nelson Benítez León <nbenitezl gmail com>
Date: Thu Mar 21 22:03:33 2019 -0400
Add support for text search across lines
Implemented in poppler MR:
https://gitlab.freedesktop.org/poppler/poppler/merge_requests/267
as the new poppler result data type is PopplerFindRectangle which
incorporates more fields than just coordinates, we create a
corresponding type in Evince, called EvFindRectangle, and use it
all across Evince including the pdf backend (ev-poppler.cc) and
djvu backend (djvu-document.c) which are the only backends that
implement text search interface.
This new feature has the following aspects:
- Ignores hyphen character while matching when 1) it's the
last character of the line and 2) its corresponding matching
character in the search term is not an hyphen too.
- Any whitespace characters in the search term will be allowed
to match on the logic position where the lines split (i.e. what
would normally be the newline character in a text file, but
PDF text does not include newline characters between lines).
- It won't match on text spanning more than two lines, i.e. it
only matches text spanning from end of one line to start of
next line.
Part of issue #333
backend/djvu/djvu-document.c | 9 +++++
backend/pdf/ev-poppler.cc | 49 ++++++++++++++++++------
libdocument/ev-document-find.c | 26 ++++++++++++-
libdocument/ev-document-find.h | 17 +++++++++
libview/ev-jobs.c | 32 ++++++++++++++--
libview/ev-jobs.h | 2 +
libview/ev-view-private.h | 8 ++--
libview/ev-view.c | 84 ++++++++++++++++++++++++++++++++++++------
shell/ev-find-sidebar.c | 30 ++++++++++++---
9 files changed, 219 insertions(+), 38 deletions(-)
---
diff --git a/backend/djvu/djvu-document.c b/backend/djvu/djvu-document.c
index 45cf33d1..ba9c1eff 100644
--- a/backend/djvu/djvu-document.c
+++ b/backend/djvu/djvu-document.c
@@ -898,6 +898,15 @@ djvu_document_find_find_text (EvDocumentFind *document,
r->y1 = height - r->y2 * 72.0 / dpi;
r->y2 = height - tmp * 72.0 / dpi;
+
+ EvFindRectangle *ev_rect = ev_find_rectangle_new ();
+ ev_rect->x1 = r->x1;
+ ev_rect->x2 = r->x2;
+ ev_rect->y1 = r->y1;
+ ev_rect->y2 = r->y2;
+
+ ev_rectangle_free (r);
+ l->data = ev_rect;
}
diff --git a/backend/pdf/ev-poppler.cc b/backend/pdf/ev-poppler.cc
index 8f2e9804..935bc04b 100644
--- a/backend/pdf/ev-poppler.cc
+++ b/backend/pdf/ev-poppler.cc
@@ -157,6 +157,11 @@ static gboolean attachment_save_to_buffer (PopplerAttachment *attachment,
gchar **buffer,
gsize *buffer_size,
GError **error);
+static GList *pdf_document_find_find_text_with_options_real (EvDocumentFind *document_find,
+ EvPage *page,
+ const gchar *text,
+ EvFindOptions options,
+ gboolean returnFindRects);
EV_BACKEND_REGISTER_WITH_CODE (PdfDocument, pdf_document,
{
@@ -1950,6 +1955,7 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
GList *matches, *l;
PopplerPage *poppler_page;
gdouble height;
+ gboolean uses_new_api;
GList *retval = NULL;
guint find_flags = 0;
@@ -1965,29 +1971,48 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
to broaden our search in order to match on more expected results */
find_flags |= POPPLER_FIND_IGNORE_DIACRITICS;
#endif
+
if (options & EV_FIND_WHOLE_WORDS_ONLY)
find_flags |= POPPLER_FIND_WHOLE_WORDS_ONLY;
+
+#if POPPLER_CHECK_VERSION(0, 77, 0)
+ /* Allow to match on text across lines */
+ find_flags |= POPPLER_FIND_ACROSS_LINES;
+ matches = poppler_page_find_text_with_options2 (poppler_page, text, (PopplerFindFlags)find_flags);
+ uses_new_api = TRUE;
+#else
matches = poppler_page_find_text_with_options (poppler_page, text, (PopplerFindFlags)find_flags);
+ uses_new_api = FALSE;
+#endif
if (!matches)
return NULL;
poppler_page_get_size (poppler_page, NULL, &height);
for (l = matches; l && l->data; l = g_list_next (l)) {
- PopplerRectangle *rect = (PopplerRectangle *)l->data;
- EvRectangle *ev_rect;
-
- ev_rect = ev_rectangle_new ();
- ev_rect->x1 = rect->x1;
- ev_rect->x2 = rect->x2;
- /* Invert this for X-style coordinates */
- ev_rect->y1 = height - rect->y2;
- ev_rect->y2 = height - rect->y1;
-
+ EvFindRectangle *ev_rect = ev_find_rectangle_new ();
+ if (uses_new_api) {
+ PopplerFindRectangle *rect = (PopplerFindRectangle *)l->data;
+ ev_rect->x1 = rect->x1;
+ ev_rect->x2 = rect->x2;
+ ev_rect->y1 = height - rect->y2;
+ ev_rect->y2 = height - rect->y1;
+ ev_rect->next_line = rect->next_line;
+ ev_rect->after_hyphen = rect->after_hyphen;
+ } else {
+ PopplerRectangle *rect = (PopplerRectangle *)l->data;
+ ev_rect->x1 = rect->x1;
+ ev_rect->x2 = rect->x2;
+ /* Invert this for X-style coordinates */
+ ev_rect->y1 = height - rect->y2;
+ ev_rect->y2 = height - rect->y1;
+ ev_rect->next_line = FALSE;
+ ev_rect->after_hyphen = FALSE;
+ }
retval = g_list_prepend (retval, ev_rect);
}
- g_list_foreach (matches, (GFunc)poppler_rectangle_free, NULL);
- g_list_free (matches);
+ g_list_free_full (matches, (uses_new_api ? (GDestroyNotify) poppler_find_rectangle_free
+ : (GDestroyNotify) poppler_rectangle_free));
return g_list_reverse (retval);
}
diff --git a/libdocument/ev-document-find.c b/libdocument/ev-document-find.c
index 607a4957..1f750c38 100644
--- a/libdocument/ev-document-find.c
+++ b/libdocument/ev-document-find.c
@@ -36,7 +36,7 @@ ev_document_find_default_init (EvDocumentFindInterface *klass)
* @text: text to find
* @case_sensitive: whether to match the string case
*
- * Returns: (transfer full) (element-type EvRectangle): a list of results
+ * Returns: (transfer full) (element-type EvFindRectangle): a list of results
*/
GList *
ev_document_find_find_text (EvDocumentFind *document_find,
@@ -56,7 +56,7 @@ ev_document_find_find_text (EvDocumentFind *document_find,
* @text: text to find
* @options: a set of #EvFindOptions
*
- * Returns: (transfer full) (element-type EvRectangle): a list of results
+ * Returns: (transfer full) (element-type EvFindRectangle): a list of results
*/
GList *
ev_document_find_find_text_with_options (EvDocumentFind *document_find,
@@ -72,6 +72,28 @@ ev_document_find_find_text_with_options (EvDocumentFind *document_find,
return ev_document_find_find_text (document_find, page, text, options & EV_FIND_CASE_SENSITIVE);
}
+/* EvFindRectangle */
+G_DEFINE_BOXED_TYPE (EvFindRectangle, ev_find_rectangle, ev_find_rectangle_copy, ev_find_rectangle_free)
+
+EvFindRectangle *
+ev_find_rectangle_new (void)
+{
+ return g_slice_new0 (EvFindRectangle);
+}
+
+EvFindRectangle *
+ev_find_rectangle_copy (EvFindRectangle *rectangle)
+{
+ g_return_val_if_fail (rectangle != NULL, NULL);
+ return g_slice_dup (EvFindRectangle, rectangle);
+}
+
+void
+ev_find_rectangle_free (EvFindRectangle *rectangle)
+{
+ g_slice_free (EvFindRectangle, rectangle);
+}
+
EvFindOptions
ev_document_find_get_supported_options (EvDocumentFind *document_find)
{
diff --git a/libdocument/ev-document-find.h b/libdocument/ev-document-find.h
index f50ef0a2..3c882f7f 100644
--- a/libdocument/ev-document-find.h
+++ b/libdocument/ev-document-find.h
@@ -42,6 +42,23 @@ G_BEGIN_DECLS
typedef struct _EvDocumentFind EvDocumentFind;
typedef struct _EvDocumentFindInterface EvDocumentFindInterface;
+typedef struct _EvFindRectangle EvFindRectangle;
+
+#define EV_TYPE_FIND_RECTANGLE (ev_find_rectangle_get_type ())
+struct _EvFindRectangle
+{
+ gdouble x1;
+ gdouble y1;
+ gdouble x2;
+ gdouble y2;
+ gboolean next_line;
+ gboolean after_hyphen;
+};
+
+GType ev_find_rectangle_get_type (void) G_GNUC_CONST;
+EvFindRectangle *ev_find_rectangle_new (void);
+EvFindRectangle *ev_find_rectangle_copy (EvFindRectangle *ev_find_rect);
+void ev_find_rectangle_free (EvFindRectangle *ev_find_rect);
typedef enum {
EV_FIND_DEFAULT = 0,
diff --git a/libview/ev-jobs.c b/libview/ev-jobs.c
index f24808b0..3a8c3b25 100644
--- a/libview/ev-jobs.c
+++ b/libview/ev-jobs.c
@@ -1640,8 +1640,7 @@ ev_job_find_dispose (GObject *object)
gint i;
for (i = 0; i < job->n_pages; i++) {
- g_list_foreach (job->pages[i], (GFunc)ev_rectangle_free, NULL);
- g_list_free (job->pages[i]);
+ g_list_free_full (job->pages[i], (GDestroyNotify)ev_find_rectangle_free);
}
g_free (job->pages);
@@ -1779,6 +1778,33 @@ ev_job_find_get_n_results (EvJobFind *job,
return g_list_length (job->pages[page]);
}
+/**
+ * ev_job_find_get_n_main_results:
+ * @job: an #EvJobFind job
+ * @page: number of the page we want to count its match results.
+ *
+ * This is similar to ev_job_find_get_n_results() but it does not
+ * count the results where <next_line> field is TRUE, i.e. the
+ * results that mark the next-line part of an across-line match.
+ *
+ * Returns: total number of match results
+ * (i.e. results which are not a next-line part) in @page
+ */
+gint
+ev_job_find_get_n_main_results (EvJobFind *job,
+ gint page)
+{
+ GList *l;
+ int n = 0;
+
+ for (l = job->pages[page]; l; l = l->next) {
+ if ( !((EvFindRectangle *) l->data)->next_line )
+ n++;
+ }
+
+ return n;
+}
+
gdouble
ev_job_find_get_progress (EvJobFind *job)
{
@@ -1808,7 +1834,7 @@ ev_job_find_has_results (EvJobFind *job)
* ev_job_find_get_results: (skip)
* @job: an #EvJobFind
*
- * Returns: a #GList of #GList<!-- -->s containing #EvRectangle<!-- -->s
+ * Returns: a #GList of #GList<!-- -->s containing #EvFindRectangle<!-- -->s
*/
GList **
ev_job_find_get_results (EvJobFind *job)
diff --git a/libview/ev-jobs.h b/libview/ev-jobs.h
index 9a197c96..41dd2f02 100644
--- a/libview/ev-jobs.h
+++ b/libview/ev-jobs.h
@@ -604,6 +604,8 @@ EvJob *ev_job_find_new (EvDocument *document,
void ev_job_find_set_options (EvJobFind *job,
EvFindOptions options);
EvFindOptions ev_job_find_get_options (EvJobFind *job);
+gint ev_job_find_get_n_main_results (EvJobFind *job,
+ gint pages);
gint ev_job_find_get_n_results (EvJobFind *job,
gint pages);
gdouble ev_job_find_get_progress (EvJobFind *job);
diff --git a/libview/ev-view-private.h b/libview/ev-view-private.h
index 02562ddd..a04c35f3 100644
--- a/libview/ev-view-private.h
+++ b/libview/ev-view-private.h
@@ -141,9 +141,11 @@ struct _EvView {
/* Find */
EvJobFind *find_job;
- GList **find_pages; /* Backwards compatibility */
- gint find_page;
- gint find_result;
+ GList **find_pages; /* Backwards compatibility. Contains EvFindRectangles's elements per page */
+ gint find_page; /* Page of current find result */
+ gint find_result; /* Index of current find result on find_pages[find_page]. For matches across
+ * two lines (which comprise two EvFindRectangle's), this will always point
+ * to the first one, i.e. the one where rect->next_line is FALSE */
gboolean jump_to_find_result;
gboolean highlight_find_results;
diff --git a/libview/ev-view.c b/libview/ev-view.c
index 31d7caef..ef8a75de 100644
--- a/libview/ev-view.c
+++ b/libview/ev-view.c
@@ -288,7 +288,7 @@ static void ev_view_handle_cursor_over_xy (EvView *view,
/*** Find ***/
static gint ev_view_find_get_n_results (EvView *view,
gint page);
-static EvRectangle *ev_view_find_get_result (EvView *view,
+static EvFindRectangle *ev_view_find_get_result (EvView *view,
gint page,
gint result);
static void jump_to_find_result (EvView *view);
@@ -6936,25 +6936,33 @@ highlight_find_results (EvView *view,
cairo_t *cr,
int page)
{
+ EvRectangle *rectangle;
gint i, n_results = 0;
n_results = ev_view_find_get_n_results (view, page);
+ rectangle = ev_rectangle_new ();
for (i = 0; i < n_results; i++) {
- EvRectangle *rectangle;
+ EvFindRectangle *find_rect;
GdkRectangle view_rectangle;
gdouble alpha;
- if (i == view->find_result && page == view->find_page) {
+ find_rect = ev_view_find_get_result (view, page, i);
+ if (page == view->find_page && (i == view->find_result ||
+ (find_rect->next_line && i == view->find_result + 1))) {
alpha = 0.6;
} else {
alpha = 0.3;
}
-
- rectangle = ev_view_find_get_result (view, page, i);
+ rectangle->x1 = find_rect->x1;
+ rectangle->x2 = find_rect->x2;
+ rectangle->y1 = find_rect->y1;
+ rectangle->y2 = find_rect->y2;
_ev_view_transform_doc_rect_to_view_rect (view, page, rectangle, &view_rectangle);
draw_rubberband (view, cr, &view_rectangle, alpha);
}
+
+ ev_rectangle_free (rectangle);
}
static void
@@ -9082,32 +9090,80 @@ ev_view_find_get_n_results (EvView *view, gint page)
return view->find_pages ? g_list_length (view->find_pages[page]) : 0;
}
-static EvRectangle *
+static EvFindRectangle *
ev_view_find_get_result (EvView *view, gint page, gint result)
{
- return view->find_pages ? (EvRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
+ return view->find_pages ? (EvFindRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
+}
+
+static EvFindRectangle *
+ev_view_find_get_result_and_next (EvView *view, gint page, gint result, EvFindRectangle **next_rect)
+{
+ GList *elem;
+
+ if (!view->find_pages)
+ return NULL;
+
+ elem = g_list_nth (view->find_pages[page], result);
+ if (!elem)
+ return NULL;
+
+ if (elem->next)
+ *next_rect = (EvFindRectangle *) elem->next->data;
+
+ return (EvFindRectangle *) elem->data;
+}
+
+static gboolean
+ev_view_find_is_next_line (EvView *view, gint page, gint result)
+{
+ if (!view->find_pages)
+ return FALSE;
+
+ GList *elem = g_list_nth (view->find_pages[page], result);
+ if (!elem)
+ return FALSE;
+
+ return ((EvFindRectangle *) elem->data)->next_line;
}
static void
jump_to_find_result (EvView *view)
{
+ EvRectangle *rect;
gint n_results;
gint page = view->find_page;
n_results = ev_view_find_get_n_results (view, page);
+ rect = ev_rectangle_new ();
if (n_results > 0 && view->find_result < n_results) {
- EvRectangle *rect;
+ EvFindRectangle *find_rect, *rect_next;
GdkRectangle view_rect;
- rect = ev_view_find_get_result (view, page, view->find_result);
+ rect_next = NULL;
+ find_rect = ev_view_find_get_result_and_next (view, page, view->find_result, &rect_next);
+ if (rect_next && rect_next->next_line) {
+ /* For an across-lines match, make sure both rectangles are visible */
+ rect->x1 = MIN (find_rect->x1, rect_next->x1);
+ rect->y1 = MIN (find_rect->y1, rect_next->y1);
+ rect->x2 = MAX (find_rect->x2, rect_next->x2);
+ rect->y2 = MAX (find_rect->y2, rect_next->y2);
+ } else {
+ rect->x1 = find_rect->x1;
+ rect->y1 = find_rect->y1;
+ rect->x2 = find_rect->x2;
+ rect->y2 = find_rect->y2;
+ }
_ev_view_transform_doc_rect_to_view_rect (view, page, rect, &view_rect);
_ev_view_ensure_rectangle_is_visible (view, &view_rect);
if (view->caret_enabled && view->rotation == 0)
- position_caret_cursor_at_doc_point (view, page, rect->x1, rect->y1);
+ position_caret_cursor_at_doc_point (view, page, find_rect->x1, find_rect->y1);
view->jump_to_find_result = FALSE;
}
+
+ ev_rectangle_free (rect);
}
/**
@@ -9233,7 +9289,8 @@ ev_view_find_next (EvView *view)
gint n_results;
n_results = ev_view_find_get_n_results (view, view->find_page);
- view->find_result++;
+ view->find_result += ev_view_find_is_next_line (view, view->find_page, view->find_result + 1)
+ ? 2 : 1;
if (view->find_result >= n_results) {
view->find_result = 0;
@@ -9249,11 +9306,14 @@ ev_view_find_next (EvView *view)
void
ev_view_find_previous (EvView *view)
{
- view->find_result--;
+ view->find_result -= ev_view_find_is_next_line (view, view->find_page, view->find_result - 1)
+ ? 2 : 1;
if (view->find_result < 0) {
jump_to_find_page (view, EV_VIEW_FIND_PREV, -1);
view->find_result = MAX (0, ev_view_find_get_n_results (view, view->find_page) - 1);
+ if (view->find_result && ev_view_find_is_next_line (view, view->find_page, view->find_result))
+ view->find_result--; /* set to last "non-nextline" result */
} else if (view->find_page != view->current_page) {
jump_to_find_page (view, EV_VIEW_FIND_PREV, 0);
}
diff --git a/shell/ev-find-sidebar.c b/shell/ev-find-sidebar.c
index 25b1d842..5a046ce8 100644
--- a/shell/ev-find-sidebar.c
+++ b/shell/ev-find-sidebar.c
@@ -268,7 +268,7 @@ ev_find_sidebar_highlight_first_match_of_page (EvFindSidebar *sidebar,
return;
for (i = 0; i < page; i++)
- index += ev_job_find_get_n_results (priv->job, i);
+ index += ev_job_find_get_n_main_results (priv->job, i);
if (priv->highlighted_result)
gtk_tree_path_free (priv->highlighted_result);
@@ -339,7 +339,9 @@ get_surrounding_text_markup (const gchar *text,
gboolean case_sensitive,
PangoLogAttr *log_attrs,
gint log_attrs_length,
- gint offset)
+ gint offset,
+ gboolean has_nextline,
+ gboolean hyphen_was_ignored)
{
gint iter;
gchar *prec = NULL;
@@ -356,7 +358,15 @@ get_surrounding_text_markup (const gchar *text,
iter = offset;
offset += g_utf8_strlen (find_text, -1);
- if (!case_sensitive)
+
+ if (has_nextline || g_utf8_offset_to_pointer (text, offset-1)[0] == '\n') {
+ if (has_nextline) {
+ offset += 1; /* for newline */
+ if (hyphen_was_ignored)
+ offset += 1; /* for hyphen */
+ }
+ match = sanitized_substring (text, iter, offset);
+ } else if (!case_sensitive)
match = g_utf8_substring (text, iter, offset);
iter = MIN (log_attrs_length, offset + 1);
@@ -409,7 +419,7 @@ get_page_text (EvDocument *document,
static gint
get_match_offset (EvRectangle *areas,
guint n_areas,
- EvRectangle *match,
+ EvFindRectangle *match,
gint offset)
{
gdouble x, y;
@@ -489,9 +499,13 @@ process_matches_idle (EvFindSidebar *sidebar)
offset = 0;
for (l = matches, result = 0; l; l = g_list_next (l), result++) {
- EvRectangle *match = (EvRectangle *)l->data;
+ EvFindRectangle *match = (EvFindRectangle *)l->data;
gchar *markup;
GtkTreeIter iter;
+ gboolean has_nextline, hyphen_ignored;
+
+ if (match->next_line)
+ continue;
offset = get_match_offset (areas, n_areas, match, offset);
if (offset == -1) {
@@ -508,12 +522,16 @@ process_matches_idle (EvFindSidebar *sidebar)
priv->insert_position++;
}
+ has_nextline = l->next && ((EvFindRectangle *)l->next->data)->next_line;
+ hyphen_ignored = l->next && ((EvFindRectangle *)l->next->data)->after_hyphen;
markup = get_surrounding_text_markup (page_text,
priv->job->text,
priv->job->case_sensitive,
text_log_attrs,
text_log_attrs_length,
- offset);
+ offset,
+ has_nextline,
+ hyphen_ignored);
gtk_list_store_set (GTK_LIST_STORE (model), &iter,
TEXT_COLUMN, markup,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]