[geary/mjog/search-update: 20/29] Geary.ImapDb.SearchQuery: Use expression to generate FTS5 queries
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary/mjog/search-update: 20/29] Geary.ImapDb.SearchQuery: Use expression to generate FTS5 queries
- Date: Fri, 6 Nov 2020 07:04:11 +0000 (UTC)
commit 6f8b69b48391cf5d9803e29f9a1af15755b888eb
Author: Michael Gratton <mike vee net>
Date: Wed Nov 4 19:36:39 2020 +1100
Geary.ImapDb.SearchQuery: Use expression to generate FTS5 queries
Move SQL generation for FTS search from ImapDb.Account to SearchQuery.
Convert to use Geary.SearchQuery.Term instances to generate SQL, rather
than parsing the expression. Simplify the generated SQL substantially
and generate MATCH values that work with SQLite FTS5.
src/engine/imap-db/imap-db-account.vala | 358 ++-------
src/engine/imap-db/imap-db-search-query.vala | 824 ++++++---------------
.../imap-engine/imap-engine-generic-account.vala | 4 +-
test/engine/imap-db/imap-db-search-query-test.vala | 199 +++++
test/meson.build | 1 +
test/test-engine.vala | 1 +
6 files changed, 480 insertions(+), 907 deletions(-)
---
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
index 7514aeba8..13134d3fd 100644
--- a/src/engine/imap-db/imap-db-account.vala
+++ b/src/engine/imap-db/imap-db-account.vala
@@ -543,36 +543,6 @@ private class Geary.ImapDB.Account : BaseObject {
return (messages.size == 0 ? null : messages);
}
- private void sql_add_query_phrases(StringBuilder sql, Gee.HashMap<string, string> query_phrases,
- string operator, string columns, string condition) {
- bool is_first_field = true;
- foreach (string field in query_phrases.keys) {
- if (!is_first_field)
- sql.append_printf("""
- %s
- SELECT %s
- FROM MessageSearchTable
- WHERE %s
- MATCH ?
- %s
- """, operator, columns, field, condition);
- else
- sql.append_printf(" AND %s MATCH ?", field);
- is_first_field = false;
- }
- }
-
- private int sql_bind_query_phrases(Db.Statement stmt, int start_index,
- Gee.HashMap<string, string> query_phrases) throws Geary.DatabaseError {
- int i = start_index;
- // This relies on the keys being returned in the same order every time
- // from the same map. It might not be guaranteed, but I feel pretty
- // confident it'll work unless you change the map in between.
- foreach (string field in query_phrases.keys)
- stmt.bind_string(i++, query_phrases.get(field));
- return i - start_index;
- }
-
// Append each id in the collection to the StringBuilder, in a format
// suitable for use in an SQL statement IN (...) clause.
private void sql_append_ids(StringBuilder s, Gee.Iterable<int64?> ids) {
@@ -612,91 +582,24 @@ private class Geary.ImapDB.Account : BaseObject {
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null)
throws Error {
- debug("Search terms, offset/limit: %s %d/%d",
- q.to_string(), offset, limit);
+ debug("Search query: %s", q.to_string());
check_open();
ImapDB.SearchQuery query = check_search_query(q);
- Gee.HashMap<string, string> query_phrases = query.get_query_phrases();
- Gee.Map<Geary.NamedFlag, bool> removal_conditions = query.get_removal_conditions();
- if (query_phrases.size == 0 && removal_conditions.is_empty)
- return null;
-
- foreach (string? field in query.get_fields()) {
- debug(" - Field \"%s\" terms:", field);
- foreach (SearchQuery.Term? term in query.get_search_terms(field)) {
- if (term != null) {
- debug(" - \"%s\": %s, %s",
- term.original,
- term.parsed,
- term.stemmed
- );
- debug(" SQL terms:");
- foreach (string sql in term.sql) {
- debug(" - \"%s\"", sql);
- }
- }
- }
- }
-
// Do this outside of transaction to catch invalid search ids up-front
string? search_ids_sql = get_search_ids_sql(search_ids);
- bool strip_greedy = query.should_strip_greedy_results();
Gee.List<EmailIdentifier> matching_ids =
new Gee.LinkedList<EmailIdentifier>();
Gee.Map<EmailIdentifier,Gee.Set<string>>? search_matches = null;
- yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
- string blacklisted_ids_sql = do_get_blacklisted_message_ids_sql(
- folder_blacklist, cx, cancellable);
-
- // Every mutation of this query we could think of has been tried,
- // and this version was found to minimize running time. We
- // discovered that just doing a JOIN between the MessageTable and
- // MessageSearchTable was causing a full table scan to order the
- // results. When it's written this way, and we force SQLite to use
- // the correct index (not sure why it can't figure it out on its
- // own), it cuts the running time roughly in half of how it was
- // before. The short version is: modify with extreme caution. See
- // <http://redmine.yorba.org/issues/7372>.
- StringBuilder sql = new StringBuilder();
- sql.append("""
- SELECT id
- FROM MessageTable
- INDEXED BY MessageTableInternalDateTimeTIndex
- """);
- if (query_phrases.size != 0) {
- sql.append("""
- WHERE id IN (
- SELECT rowid
- FROM MessageSearchTable
- WHERE 1=1
- """);
- sql_add_query_phrases(sql, query_phrases, "INTERSECT", "rowid", "");
- sql.append(")");
- } else
- sql.append(" WHERE 1=1");
-
- if (blacklisted_ids_sql != "")
- sql.append(" AND id NOT IN (%s)".printf(blacklisted_ids_sql));
- if (!Geary.String.is_empty(search_ids_sql))
- sql.append(" AND id IN (%s)".printf(search_ids_sql));
- sql.append(" ORDER BY internaldate_time_t DESC");
- if (limit > 0)
- sql.append(" LIMIT ? OFFSET ?");
-
- Db.Statement stmt = cx.prepare(sql.str);
- int bind_index = sql_bind_query_phrases(stmt, 0, query_phrases);
- if (limit > 0) {
- stmt.bind_int(bind_index++, limit);
- stmt.bind_int(bind_index++, offset);
- }
-
- Gee.HashMap<int64?, ImapDB.EmailIdentifier> id_map = new Gee.HashMap<int64?,
ImapDB.EmailIdentifier>(
+ yield db.exec_transaction_async(RO, (cx) => {
+ var id_map = new Gee.HashMap<int64?, ImapDB.EmailIdentifier>(
Collection.int64_hash_func, Collection.int64_equal_func);
-
+ Db.Statement stmt = query.get_search_query(
+ cx, search_ids_sql, folder_blacklist, limit, offset, cancellable
+ );
Db.Result result = stmt.exec(cancellable);
while (!result.finished) {
int64 message_id = result.int64_at(0);
@@ -707,8 +610,7 @@ private class Geary.ImapDB.Account : BaseObject {
result.next(cancellable);
}
-
- if (strip_greedy && !id_map.is_empty) {
+ if (query.has_stemmed_terms && !id_map.is_empty) {
search_matches = do_get_search_matches(
cx, query, id_map, cancellable
);
@@ -719,13 +621,7 @@ private class Geary.ImapDB.Account : BaseObject {
debug("Matching emails found: %d", matching_ids.size);
- if (!removal_conditions.is_empty) {
- yield strip_removal_conditions(
- query, matching_ids, removal_conditions, cancellable
- );
- }
-
- if (strip_greedy && search_matches != null) {
+ if (query.has_stemmed_terms && search_matches != null) {
strip_greedy_results(query, matching_ids, search_matches);
}
@@ -733,40 +629,6 @@ private class Geary.ImapDB.Account : BaseObject {
return matching_ids.is_empty ? null : matching_ids;
}
- // Strip out from the given collection any email that matches the
- // given removal conditions
- private async void strip_removal_conditions(ImapDB.SearchQuery query,
- Gee.Collection<EmailIdentifier> matches,
- Gee.Map<Geary.NamedFlag,bool> conditions,
- GLib.Cancellable? cancellable = null)
- throws GLib.Error {
- Email.Field required_fields = Geary.Email.Field.FLAGS;
- Gee.Iterator<EmailIdentifier> iter = matches.iterator();
-
- yield db.exec_transaction_async(RO, (cx) => {
- while (iter.next()) {
- ImapDB.EmailIdentifier id = iter.get();
- MessageRow row = Geary.ImapDB.Folder.do_fetch_message_row(
- cx, id.message_id, required_fields, null, cancellable
- );
- Geary.EmailFlags? flags = row.get_generic_email_flags();
- if (flags != null) {
- foreach (Gee.Map.Entry<NamedFlag,bool> condition
- in conditions.entries) {
- if (flags.contains(condition.key) == condition.value) {
- iter.remove();
- break;
- }
- }
- } else {
- iter.remove();
- }
- }
- return Db.TransactionOutcome.DONE;
- }, cancellable
- );
- }
-
// Strip out from the given collection of matching ids and results
// for any search results that only contain a hit due to "greedy"
// matching of the stemmed variants on all search terms.
@@ -774,46 +636,46 @@ private class Geary.ImapDB.Account : BaseObject {
Gee.Collection<EmailIdentifier> matches,
Gee.Map<EmailIdentifier,Gee.Set<string>> results) {
int prestripped_results = matches.size;
- Gee.Iterator<EmailIdentifier> iter = matches.iterator();
- while (iter.next()) {
- // For each matched string in this message, retain the message in the search results
- // if it prefix-matches any of the straight-up parsed terms or matches a stemmed
- // variant (with only max. difference in their lengths allowed, i.e. not a "greedy"
- // match)
- EmailIdentifier id = iter.get();
- bool good_match_found = false;
- Gee.Set<string>? result = results.get(id);
- if (result != null) {
- foreach (string match in result) {
- foreach (SearchQuery.Term term in query.get_all_terms()) {
- // if prefix-matches parsed term, then don't strip
- if (match.has_prefix(term.parsed)) {
- good_match_found = true;
- break;
- }
-
- // if prefix-matches stemmed term w/o doing so
- // greedily, then don't strip
- if (term.stemmed != null && match.has_prefix(term.stemmed)) {
- int diff = match.length - term.stemmed.length;
- if (diff <= query.max_difference_match_stem_lengths) {
- good_match_found = true;
- break;
- }
- }
- }
- }
-
- if (good_match_found) {
- break;
- }
- }
-
- if (!good_match_found) {
- iter.remove();
- matches.remove(id);
- }
- }
+ // Gee.Iterator<EmailIdentifier> iter = matches.iterator();
+ // while (iter.next()) {
+ // // For each matched string in this message, retain the message in the search results
+ // // if it prefix-matches any of the straight-up parsed terms or matches a stemmed
+ // // variant (with only max. difference in their lengths allowed, i.e. not a "greedy"
+ // // match)
+ // EmailIdentifier id = iter.get();
+ // bool good_match_found = false;
+ // Gee.Set<string>? result = results.get(id);
+ // if (result != null) {
+ // foreach (string match in result) {
+ // foreach (SearchQuery.Term term in query.get_all_terms()) {
+ // // if prefix-matches parsed term, then don't strip
+ // if (match.has_prefix(term.parsed)) {
+ // good_match_found = true;
+ // break;
+ // }
+
+ // // if prefix-matches stemmed term w/o doing so
+ // // greedily, then don't strip
+ // if (term.stemmed != null && match.has_prefix(term.stemmed)) {
+ // int diff = match.length - term.stemmed.length;
+ // if (diff <= query.max_difference_match_stem_lengths) {
+ // good_match_found = true;
+ // break;
+ // }
+ // }
+ // }
+ // }
+
+ // if (good_match_found) {
+ // break;
+ // }
+ // }
+
+ // if (!good_match_found) {
+ // iter.remove();
+ // matches.remove(id);
+ // }
+ // }
debug("Stripped %d emails from search for [%s] due to greedy stem matching",
prestripped_results - matches.size, query.raw);
@@ -831,12 +693,15 @@ private class Geary.ImapDB.Account : BaseObject {
foreach (ImapDB.EmailIdentifier id in ids)
id_map.set(id.message_id, id);
- Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? match_map =
- do_get_search_matches(cx, query, id_map, cancellable);
- if (match_map == null || match_map.size == 0)
+ Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? match_map = null;
+ if (!id_map.is_empty) {
+ match_map = do_get_search_matches(cx, query, id_map, cancellable);
+ }
+ if (match_map == null || match_map.size == 0) {
return Db.TransactionOutcome.DONE;
+ }
- if (query.should_strip_greedy_results()) {
+ if (query.has_stemmed_terms) {
strip_greedy_results(query, ids, match_map);
}
@@ -1219,68 +1084,6 @@ private class Geary.ImapDB.Account : BaseObject {
return !result.finished;
}
- // Turn the collection of folder paths into actual folder ids. As a
- // special case, if "folderless" or orphan emails are to be blacklisted,
- // set the out bool to true.
- private Gee.Collection<int64?> do_get_blacklisted_folder_ids(Gee.Collection<Geary.FolderPath?>?
folder_blacklist,
- Db.Connection cx, out bool blacklist_folderless, Cancellable? cancellable) throws Error {
- blacklist_folderless = false;
- Gee.ArrayList<int64?> ids = new Gee.ArrayList<int64?>();
-
- if (folder_blacklist != null) {
- foreach (Geary.FolderPath? folder_path in folder_blacklist) {
- if (folder_path == null) {
- blacklist_folderless = true;
- } else {
- int64 id;
- do_fetch_folder_id(cx, folder_path, true, out id, cancellable);
- if (id != Db.INVALID_ROWID)
- ids.add(id);
- }
- }
- }
-
- return ids;
- }
-
- // Return a parameterless SQL statement that selects any message ids that
- // are in a blacklisted folder. This is used as a sub-select for the
- // search query to omit results from blacklisted folders.
- private string do_get_blacklisted_message_ids_sql(Gee.Collection<Geary.FolderPath?>? folder_blacklist,
- Db.Connection cx, Cancellable? cancellable) throws Error {
- bool blacklist_folderless;
- Gee.Collection<int64?> blacklisted_ids = do_get_blacklisted_folder_ids(
- folder_blacklist, cx, out blacklist_folderless, cancellable);
-
- StringBuilder sql = new StringBuilder();
- if (blacklisted_ids.size > 0) {
- sql.append("""
- SELECT message_id
- FROM MessageLocationTable
- WHERE remove_marker = 0
- AND folder_id IN (
- """);
- sql_append_ids(sql, blacklisted_ids);
- sql.append(")");
-
- if (blacklist_folderless)
- sql.append(" UNION ");
- }
- if (blacklist_folderless) {
- sql.append("""
- SELECT id
- FROM MessageTable
- WHERE id NOT IN (
- SELECT message_id
- FROM MessageLocationTable
- WHERE remove_marker = 0
- )
- """);
- }
-
- return sql.str;
- }
-
// For a message row id, return a set of all folders it's in, or null if
// it's not in any folders.
private Gee.Set<Geary.FolderPath>?
@@ -1406,42 +1209,22 @@ private class Geary.ImapDB.Account : BaseObject {
// Not using a MultiMap because when traversing want to process all values at once per iteration,
// not per key-value
- public Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? do_get_search_matches(Db.Connection cx,
- ImapDB.SearchQuery query, Gee.Map<int64?, ImapDB.EmailIdentifier> id_map, Cancellable? cancellable)
- throws Error {
- if (id_map.size == 0)
- return null;
-
- Gee.HashMap<string, string> query_phrases = query.get_query_phrases();
- if (query_phrases.size == 0)
- return null;
-
- StringBuilder sql = new StringBuilder();
- sql.append("""
- SELECT rowid, geary_matches(MessageSearchTable), *
- FROM MessageSearchTable
- WHERE rowid IN (
- """);
- sql_append_ids(sql, id_map.keys);
- sql.append(")");
-
- StringBuilder condition = new StringBuilder("AND rowid IN (");
- sql_append_ids(condition, id_map.keys);
- condition.append(")");
- sql_add_query_phrases(sql, query_phrases, "UNION", "rowid, geary_matches(MessageSearchTable), *",
- condition.str);
-
- Db.Statement stmt = cx.prepare(sql.str);
- sql_bind_query_phrases(stmt, 0, query_phrases);
-
- var search_matches =
- new Gee.HashMap<ImapDB.EmailIdentifier,Gee.Set<string>>();
+ public Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>> do_get_search_matches(
+ Db.Connection cx,
+ ImapDB.SearchQuery query,
+ Gee.Map<int64?, ImapDB.EmailIdentifier> id_map,
+ GLib.Cancellable? cancellable
+ ) throws GLib.Error {
+ var search_ids_sql = new GLib.StringBuilder();
+ sql_append_ids(search_ids_sql, id_map.keys);
- Db.Result result = stmt.exec(cancellable);
+ var search_matches = new Gee.HashMap<ImapDB.EmailIdentifier,Gee.Set<string>>();
+ Db.Result result = query.get_match_query(
+ cx, search_ids_sql.str
+ ).exec(cancellable);
while (!result.finished) {
- int64 rowid = result.rowid_at(0);
- assert(id_map.has_key(rowid));
- ImapDB.EmailIdentifier id = id_map.get(rowid);
+ int64 docid = result.rowid_at(0);
+ ImapDB.EmailIdentifier id = id_map.get(docid);
// XXX Avoid a crash when "database disk image is
// malformed" error occurs. Remove this when the SQLite
@@ -1502,8 +1285,9 @@ private class Geary.ImapDB.Account : BaseObject {
private ImapDB.SearchQuery check_search_query(Geary.SearchQuery q) throws Error {
ImapDB.SearchQuery? query = q as ImapDB.SearchQuery;
- if (query == null || query.account != this)
+ if (query == null) {
throw new EngineError.BAD_PARAMETERS("Geary.SearchQuery not associated with %s", name);
+ }
return query;
}
diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala
index 9e2c38e97..e03e2d0ff 100644
--- a/src/engine/imap-db/imap-db-search-query.vala
+++ b/src/engine/imap-db/imap-db-search-query.vala
@@ -1,6 +1,6 @@
/*
- * Copyright 2016 Software Freedom Conservancy Inc.
- * Copyright 2019 Michael Gratton <mike vee net>.
+ * Copyright © 2016 Software Freedom Conservancy Inc.
+ * Copyright © 2019-2020 Michael Gratton <mike vee net>.
*
* This software is licensed under the GNU Lesser General Public License
* (version 2.1 or later). See the COPYING file in this distribution.
@@ -11,619 +11,104 @@
*/
private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
- // These characters are chosen for being commonly used to continue a single word (such as
- // extended last names, i.e. "Lars-Eric") or in terms commonly searched for in an email client,
- // i.e. unadorned mailbox addresses. Note that characters commonly used for wildcards or that
- // would be interpreted as wildcards by SQLite are not included here.
- private const unichar[] SEARCH_TERM_CONTINUATION_CHARS = { '-', '_', '.', '@' };
- // Search operator field names, eg: "to:foo example com" or "is:unread"
- private const string SEARCH_OP_ATTACHMENT = "attachments";
- private const string SEARCH_OP_BCC = "bcc";
- private const string SEARCH_OP_BODY = "body";
- private const string SEARCH_OP_CC = "cc";
- private const string SEARCH_OP_FROM = "\"from\"";
- private const string SEARCH_OP_IS = "is";
- private const string SEARCH_OP_SUBJECT = "subject";
- private const string SEARCH_OP_TO = "receivers";
+ private const string EMAIL_TEXT_STEMMED_TERMS = "geary-stemmed-terms";
- // Operators allowing finding mail addressed to "me"
- private const string[] SEARCH_OP_TO_ME_FIELDS = {
- SEARCH_OP_BCC,
- SEARCH_OP_CC,
- SEARCH_OP_TO,
- };
- // The addressable op value for "me"
- private const string SEARCH_OP_ADDRESSABLE_VALUE_ME = "me";
-
- // Search operator field values
- private const string SEARCH_OP_VALUE_READ = "read";
- private const string SEARCH_OP_VALUE_STARRED = "starred";
- private const string SEARCH_OP_VALUE_UNREAD = "unread";
-
-
- /**
- * Various associated state with a single term in a search query.
- */
- internal class Term : GLib.Object {
-
- /**
- * The original tokenized search term with minimal other processing performed.
- *
- * For example, punctuation might be removed, but no casefolding has occurred.
- */
- public string original { get; private set; }
-
- /**
- * The parsed tokenized search term.
- *
- * Casefolding and other normalizing text operations have been performed.
- */
- public string parsed { get; private set; }
-
- /**
- * The stemmed search term.
- *
- * Only used if stemming is being done ''and'' the stem is different than the {@link parsed}
- * term.
- */
- public string? stemmed { get; private set; }
-
- /**
- * A list of terms ready for binding to an SQLite statement.
- *
- * This should include prefix operators and quotes (i.e. ["party"] or [party*]). These texts
- * are guaranteed not to be null or empty strings.
- */
- public Gee.List<string> sql { get; private set; default = new Gee.ArrayList<string>(); }
-
- /**
- * Returns true if the {@link parsed} term is exact-match only (i.e. starts with quotes) and
- * there is no {@link stemmed} variant.
- */
- public bool is_exact { get { return parsed.has_prefix("\"") && stemmed == null; } }
-
- public Term(string original, string parsed, string? stemmed, string? sql_parsed, string?
sql_stemmed) {
- this.original = original;
- this.parsed = parsed;
- this.stemmed = stemmed;
-
- // for now, only two variations: the parsed string and the stemmed; since stem is usually
- // shorter (and will be first in the OR statement), include it first
- if (!String.is_empty(sql_stemmed))
- sql.add(sql_stemmed);
-
- if (!String.is_empty(sql_parsed))
- sql.add(sql_parsed);
- }
- }
-
- private Geary.SearchQuery.Strategy strategy;
-
- // Maps of localised search operator names and values to their
- // internal forms
- private static Gee.HashMap<string, string> search_op_names =
- new Gee.HashMap<string, string>();
- private static Gee.ArrayList<string> search_op_to_me_values =
- new Gee.ArrayList<string>();
- private static Gee.ArrayList<string> search_op_from_me_values =
- new Gee.ArrayList<string>();
- private static Gee.HashMap<string, string> search_op_is_values =
- new Gee.HashMap<string, string>();
-
-
- static construct {
- // Map of possibly translated search operator names and values
- // to English/internal names and values. We include the
- // English version anyway so that when translations provide a
- // localised version of the operator names but have not also
- // translated the user manual, the English version in the
- // manual still works.
-
- // Can be typed in the search box like "attachment:file.txt"
- // to find messages with attachments with a particular name.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "attachment"), SEARCH_OP_ATTACHMENT);
- // Can be typed in the search box like
- // "bcc:johndoe example com" to find messages bcc'd to a
- // particular person.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "bcc"), SEARCH_OP_BCC);
- // Can be typed in the search box like "body:word" to find
- // "word" only if it occurs in the body of a message.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "body"), SEARCH_OP_BODY);
- // Can be typed in the search box like
- // "cc:johndoe example com" to find messages cc'd to a
- // particular person.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "cc"), SEARCH_OP_CC);
- // Can be typed in the search box like
- // "from:johndoe example com" to find messages from a
- // particular sender.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "from"), SEARCH_OP_FROM);
- // Can be typed in the search box like "is:unread" to find
- // messages that are read, unread, or starred.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "is"), SEARCH_OP_IS);
- // Can be typed in the search box like "subject:word" to find
- // "word" only if it occurs in the subject of a message.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary
- // User Guide.
- search_op_names.set(C_("Search operator", "subject"), SEARCH_OP_SUBJECT);
- // Can be typed in the search box like
- // "to:johndoe example com" to find messages received by a
- // particular person.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_names.set(C_("Search operator", "to"), SEARCH_OP_TO);
-
- // And the English language versions
- search_op_names.set("attachment", SEARCH_OP_ATTACHMENT);
- search_op_names.set("bcc", SEARCH_OP_BCC);
- search_op_names.set("body", SEARCH_OP_BODY);
- search_op_names.set("cc", SEARCH_OP_CC);
- search_op_names.set("from", SEARCH_OP_FROM);
- search_op_names.set("is", SEARCH_OP_IS);
- search_op_names.set("subject", SEARCH_OP_SUBJECT);
- search_op_names.set("to", SEARCH_OP_TO);
-
- // Can be typed in the search box after "to:", "cc:" and
- // "bcc:" e.g.: "to:me". Matches conversations that are
- // addressed to the user.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_to_me_values.add(
- C_("Search operator value - mail addressed to the user", "me")
- );
- search_op_to_me_values.add(SEARCH_OP_ADDRESSABLE_VALUE_ME);
-
- // Can be typed in the search box after "from:" i.e.:
- // "from:me". Matches conversations were sent by the user.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_from_me_values.add(
- C_("Search operator value - mail sent by the user", "me")
- );
- search_op_from_me_values.add(SEARCH_OP_ADDRESSABLE_VALUE_ME);
-
- // Can be typed in the search box after "is:" i.e.:
- // "is:read". Matches conversations that are flagged as read.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_is_values.set(
- C_("'is:' search operator value", "read"), SEARCH_OP_VALUE_READ
- );
- // Can be typed in the search box after "is:" i.e.:
- // "is:starred". Matches conversations that are flagged as
- // starred.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_is_values.set(
- C_("'is:' search operator value", "starred"), SEARCH_OP_VALUE_STARRED
- );
- // Can be typed in the search box after "is:" i.e.:
- // "is:unread". Matches conversations that are flagged unread.
- //
- // The translated string must be a single word (use '-', '_'
- // or similar to combine words into one), should be short, and
- // also match the translation in "search.page" of the Geary User
- // Guide.
- search_op_is_values.set(
- C_("'is:' search operator value", "unread"), SEARCH_OP_VALUE_UNREAD
- );
- search_op_is_values.set(SEARCH_OP_VALUE_READ, SEARCH_OP_VALUE_READ);
- search_op_is_values.set(SEARCH_OP_VALUE_STARRED, SEARCH_OP_VALUE_STARRED);
- search_op_is_values.set(SEARCH_OP_VALUE_UNREAD, SEARCH_OP_VALUE_UNREAD);
- }
-
-
- /**
- * Associated {@link ImapDB.Account}.
- */
- public weak ImapDB.Account account { get; private set; }
-
- /**
- * Returns whether stemming may be used when exerting the search.
- *
- * Determined by {@link Geary.SearchQuery.Strategy} passed to the
- * constructor.
- */
- public bool allow_stemming { get; private set; }
-
- /**
- * Minimum length of the term before stemming is allowed.
- *
- * This prevents short words that might be stemmed from being stemmed.
- *
- * Overridden by {@link allow_stemming}. Determined by the {@link
- * Geary.SearchQuery.Strategy} passed to the constructor.
- */
- public int min_term_length_for_stemming { get; private set; }
-
-
- /**
- * Maximum difference in lengths between term and stemmed variant.
- *
- * This prevents long words from being stemmed to much shorter
- * words (which creates opportunities for greedy matching).
- *
- * Overridden by {@link allow_stemming}. Determined by the {@link
- * Geary.SearchQuery.Strategy} passed to the constructor.
- */
- public int max_difference_term_stem_lengths { get; private set; }
-
- /**
- * Maximum difference in lengths between a matched word and the stemmed variant it matched
- * against.
- *
- * This prevents long words being matched to short stem variants (which creates opportunities
- * for greedy matching).
- *
- * Overridden by {@link allow_stemming}. Determined by the {@link
- * Geary.SearchQuery.Strategy} passed to the constructor.
- */
- public int max_difference_match_stem_lengths { get; private set; }
-
- // Maps search operator field names such as "to", "cc", "is" to
- // their search term values. Note that terms without an operator
- // are stored with null as the key. Not using a MultiMap because
- // we (might) need a guarantee of order.
- private Gee.HashMap<string?, Gee.ArrayList<Term>> field_map
- = new Gee.HashMap<string?, Gee.ArrayList<Term>>();
-
- // A list of all search terms, regardless of search op field name
- private Gee.ArrayList<Term> all = new Gee.ArrayList<Term>();
+ internal bool has_stemmed_terms { get; private set; default = false; }
private unowned SnowBall.Stemmer stemmer;
- public async SearchQuery(Geary.Account owner,
- ImapDB.Account local,
- Gee.Collection<Geary.SearchQuery.Term> expression,
- string raw,
- SnowBall.Stemmer stemmer,
- Geary.SearchQuery.Strategy strategy,
- GLib.Cancellable? cancellable) {
+ public SearchQuery(Gee.List<Term> expression,
+ string raw,
+ SnowBall.Stemmer stemmer) {
base(expression, raw);
- this.account = local;
this.stemmer = stemmer;
- switch (strategy) {
- case Strategy.EXACT:
- allow_stemming = false;
- min_term_length_for_stemming = int.MAX;
- max_difference_term_stem_lengths = 0;
- max_difference_match_stem_lengths = 0;
- break;
-
- case Strategy.CONSERVATIVE:
- allow_stemming = true;
- min_term_length_for_stemming = 6;
- max_difference_term_stem_lengths = 2;
- max_difference_match_stem_lengths = 2;
- break;
-
- case Strategy.AGGRESSIVE:
- allow_stemming = true;
- min_term_length_for_stemming = 4;
- max_difference_term_stem_lengths = 4;
- max_difference_match_stem_lengths = 3;
- break;
-
- case Strategy.HORIZON:
- allow_stemming = true;
- min_term_length_for_stemming = 0;
- max_difference_term_stem_lengths = int.MAX;
- max_difference_match_stem_lengths = int.MAX;
- break;
- }
-
- yield prepare(cancellable);
- }
-
- public Gee.Collection<string?> get_fields() {
- return field_map.keys;
- }
-
- public Gee.List<Term>? get_search_terms(string? field) {
- return field_map.has_key(field) ? field_map.get(field) : null;
- }
-
- public Gee.List<Term>? get_all_terms() {
- return all;
- }
-
- // For some searches, results are stripped if they're too
- // "greedy", but this requires examining the matched text, which
- // has an expense to fetch, so avoid doing so unless necessary
- internal bool should_strip_greedy_results() {
- // HORIZON strategy is configured in such a way to allow all
- // stemmed variants to match, so don't do any stripping in
- // that case
- //
- // If any of the search terms is exact-match (no prefix
- // matching) or none have stemmed variants, then don't do
- // stripping of "greedy" stemmed matching (because in both
- // cases, there are none)
-
- bool strip_results = true;
- if (this.strategy == Geary.SearchQuery.Strategy.HORIZON)
- strip_results = false;
- else if (traverse<Term>(this.all).any(
- term => term.stemmed == null || term.is_exact)) {
- strip_results = false;
- }
- return strip_results;
- }
-
- internal Gee.Map<Geary.NamedFlag,bool> get_removal_conditions() {
- Gee.Map<Geary.NamedFlag,bool> conditions =
- new Gee.HashMap<Geary.NamedFlag,bool>();
- foreach (string? field in this.field_map.keys) {
- if (field == SEARCH_OP_IS) {
- Gee.List<Term>? terms = get_search_terms(field);
- foreach (Term term in terms)
- if (term.parsed == SEARCH_OP_VALUE_READ)
- conditions.set(new NamedFlag("UNREAD"), true);
- else if (term.parsed == SEARCH_OP_VALUE_UNREAD)
- conditions.set(new NamedFlag("UNREAD"), false);
- else if (term.parsed == SEARCH_OP_VALUE_STARRED)
- conditions.set(new NamedFlag("FLAGGED"), false);
+ // Pre-stem search terms up front since the stemmed form is
+ // needed in a few different places
+ foreach (var term in this.expression) {
+ // Use this brittle form of type checking for performance
+ // (both here and further below in the class) - the Engine
+ // controls the Term hierarchy the needed assumptions can
+ // be made
+ if (term.get_type() == typeof(EmailTextTerm)) {
+ var text = (EmailTextTerm) term;
+ if (text.matching_strategy.is_stemming_enabled()) {
+ stem_search_terms(text);
+ }
}
}
- return conditions;
}
- // Return a map of column -> phrase, to use as WHERE column MATCH 'phrase'.
- internal Gee.HashMap<string, string> get_query_phrases() {
- Gee.HashMap<string, string> phrases = new Gee.HashMap<string, string>();
- foreach (string? field in field_map.keys) {
- Gee.List<Term>? terms = get_search_terms(field);
- if (terms == null || terms.size == 0 || field == "is")
- continue;
-
- // Each Term is an AND but the SQL text within in are OR ... this allows for
- // each user term to be AND but the variants of each term are or. So, if terms are
- // [party] and [eventful] and stems are [parti] and [event], the search would be:
- //
- // (party* OR parti*) AND (eventful* OR event*)
- //
- // Obviously with stemming there's the possibility of the stemmed variant being nothing
- // but a broader search of the original term (such as event* and eventful*) but do both
- // to determine from each hit result which term caused the hit, and if it's too greedy
- // a match of the stemmed variant, it can be stripped from the results.
- //
- // Note that this uses SQLite's "standard" query syntax for MATCH, where AND is implied
- // (and would be treated as search term if included), parentheses are not allowed, and
- // OR has a higher precedence than AND. So the above example in standard syntax is:
- //
- // party* OR parti* eventful* OR event*
- StringBuilder builder = new StringBuilder();
- foreach (Term term in terms) {
- if (term.sql.size == 0)
- continue;
-
- if (term.is_exact) {
- builder.append_printf("%s ", term.parsed);
- } else {
- bool is_first_sql = true;
- foreach (string sql in term.sql) {
- if (!is_first_sql)
- builder.append(" OR ");
-
- builder.append_printf("%s ", sql);
- is_first_sql = false;
- }
- }
+ internal Db.Statement get_search_query(
+ Db.Connection cx,
+ string? search_ids_sql,
+ Gee.Collection<Geary.FolderPath>? folder_blacklist,
+ int limit,
+ int offset,
+ GLib.Cancellable? cancellable
+ ) throws GLib.Error {
+ var sql = new GLib.StringBuilder();
+ var conditions_added = false;
+
+ sql.append("""
+ SELECT mst.rowid
+ FROM MessageSearchTable as mst
+ INNER JOIN MessageTable AS mt ON mt.id = mst.rowid
+ WHERE""");
+ conditions_added = sql_add_term_conditions(sql, conditions_added);
+ if (!String.is_empty(search_ids_sql)) {
+ if (conditions_added) {
+ sql.append(" AND");
}
-
- phrases.set(field ?? "MessageSearchTable", builder.str);
+ sql.append(""" id IN (%s)""".printf(search_ids_sql));
}
-
- return phrases;
- }
-
- private async void prepare(GLib.Cancellable? cancellable) {
- // A few goals here:
- // 1) Append an * after every term so it becomes a prefix search
- // (see <https://www.sqlite.org/fts3.html#section_3>)
- // 2) Strip out common words/operators that might get interpreted as
- // search operators
- // 3) Parse each word into a list of which field it applies to, so
- // you can do "to:johndoe example com thing" (quotes excluded)
- // to find messages to John containing the word thing
- // We ignore everything inside quotes to give the user a way to
- // override our algorithm here. The idea is to offer one search query
- // syntax for Geary that we can use locally and via IMAP, etc.
-
- string quote_balanced = this.raw;
- if (Geary.String.count_char(this.raw, '"') % 2 != 0) {
- // Remove the last quote if it's not balanced. This has the
- // benefit of showing decent results as you type a quoted phrase.
- int last_quote = this.raw.last_index_of_char('"');
- assert(last_quote >= 0);
- quote_balanced = this.raw.splice(last_quote, last_quote + 1, " ");
+ sql.append("""
+ ORDER BY mt.internaldate_time_t DESC""");
+ if (limit > 0) {
+ sql.append("""
+ LIMIT ? OFFSET ?""");
}
- string[] words = quote_balanced.split_set(" \t\r\n()%*\\");
- bool in_quote = false;
- foreach (string s in words) {
- string? field = null;
-
- s = s.strip();
-
- int quotes = Geary.String.count_char(s, '"');
- if (!in_quote && quotes > 0) {
- in_quote = true;
- --quotes;
- }
-
- Term? term;
- if (in_quote) {
- // HACK: this helps prevent a syntax error when the user types
- // something like from:"somebody". If we ever properly support
- // quotes after : we can get rid of this.
- term = new Term(s, s, null, s.replace(":", " "), null);
- } else {
- string original = s;
-
- // Some common search phrases we don't respect and
- // therefore don't want to fall through to search
- // results
- // XXX translate these
- string lower = s.down();
- switch (lower) {
- case "":
- case "and":
- case "or":
- case "not":
- case "near":
- continue;
-
- default:
- if (lower.has_prefix("near/"))
- continue;
- break;
- }
-
- if (s.has_prefix("-"))
- s = s.substring(1);
-
- if (s == "")
- continue;
-
- // TODO: support quotes after :
- string[] parts = s.split(":", 2);
- if (parts.length > 1)
- field = extract_field_from_token(parts, ref s);
-
- if (field == SEARCH_OP_IS) {
- // s will have been de-translated
- term = new Term(original, s, null, null, null);
- } else {
- // SQL MATCH syntax for parsed term
- string? sql_s = "%s*".printf(s);
-
- // stem the word, but if stemmed and stem is
- // simply shorter version of original term, only
- // prefix-match search for it (i.e. avoid
- // searching for [archive* OR archiv*] when that's
- // the same as [archiv*]), otherwise search for
- // both
- string? stemmed = yield stem_search_term(s, cancellable);
-
- string? sql_stemmed = null;
- if (stemmed != null) {
- sql_stemmed = "%s*".printf(stemmed);
- if (s.has_prefix(stemmed))
- sql_s = null;
- }
-
- // if term contains continuation characters, treat
- // as exact search to reduce effects of tokenizer
- // splitting terms w/ punctuation in them
- if (String.contains_any_char(s, SEARCH_TERM_CONTINUATION_CHARS))
- s = "\"%s\"".printf(s);
-
- term = new Term(original, s, stemmed, sql_s, sql_stemmed);
- }
- }
-
- if (in_quote && quotes % 2 != 0)
- in_quote = false;
-
- // Finally, add the term
- if (!this.field_map.has_key(field)) {
- this.field_map.set(field, new Gee.ArrayList<Term>());
- }
- this.field_map.get(field).add(term);
- this.all.add(term);
+ Db.Statement stmt = cx.prepare(sql.str);
+ int bind_index = sql_bind_term_conditions(stmt, false, 0);
+ if (limit > 0) {
+ stmt.bind_int(bind_index++, limit);
+ stmt.bind_int(bind_index++, offset);
}
+
+ return stmt;
}
- private string? extract_field_from_token(string[] parts, ref string token) {
- string? field = null;
- if (Geary.String.is_empty_or_whitespace(parts[1])) {
- // User stopped at "field:", treat it as if they hadn't
- // typed the ':'
- token = parts[0];
- } else {
- field = search_op_names.get(parts[0].down());
- if (field == SEARCH_OP_IS) {
- string? value = search_op_is_values.get(parts[1].down());
- if (value != null) {
- token = value;
- } else {
- // Unknown op value, pretend there is no search op
- field = null;
- }
- } else if (field == SEARCH_OP_FROM &&
- parts[1].down() in search_op_from_me_values) {
- // Search for all addresses on the account. Bug 768779
- token = this.account.account_information.primary_mailbox.address;
- } else if (field in SEARCH_OP_TO_ME_FIELDS &&
- parts[1].down() in search_op_to_me_values) {
- // Search for all addresses on the account. Bug 768779
- token = this.account.account_information.primary_mailbox.address;
- } else if (field != null) {
- token = parts[1];
- }
- }
- return field;
+ internal Db.Statement get_match_query(
+ Db.Connection cx,
+ string? search_ids_sql
+ ) throws GLib.Error {
+ var sql = new GLib.StringBuilder();
+ sql.append("""
+ SELECT mst.rowid, geary_matches(MessageSearchTable)
+ FROM MessageSearchTable as mst
+ WHERE rowid IN (
+ """);
+ sql.append(search_ids_sql);
+ sql.append(") AND ");
+ sql_add_term_conditions(sql, false);
+
+ Db.Statement stmt = cx.prepare(sql.str);
+ sql_bind_term_conditions(stmt, true, 0);
+ return stmt;
}
/**
- * Converts unquoted search terms into a stemmed search term.
+ * Applies stemming for the given term to a specific term value.
*
* Prior experience with the Snowball stemmer indicates it is too
* aggressive for our tastes when coupled with prefix-matching of
- * all unquoted terms (see
- * https://bugzilla.gnome.org/show_bug.cgi?id=713179).
+ * all unquoted terms. See
+ * https://bugzilla.gnome.org/show_bug.cgi?id=713179 and
+ * https://bugzilla.gnome.org/show_bug.cgi?id=720361
*
* This method is part of a larger strategy designed to dampen
* that aggressiveness without losing the benefits of stemming
@@ -632,47 +117,152 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
* Post-search processing is then to strip results which are too
* "greedy" due to prefix-matching the stemmed variant.
*
- * Some heuristics are in place simply to determine if stemming should occur:
+ * Some heuristics are in place simply to determine if stemming
+ * should occur:
*
* # If stemming is unallowed, no stemming occurs.
- * # If the term is < min. term length for stemming, no stemming occurs.
- * # If the stemmer returns a stem that is the same as the original term, no stemming occurs.
- * # If the difference between the stemmed word and the original term is more than
- * maximum allowed, no stemming occurs. This works under the assumption that if
- * the user has typed a long word, they do not want to "go back" to searching for a much
- * shorter version of it. (For example, "accountancies" stems to "account").
+ * # If the term is < min. term length for stemming, no stemming
+ * occurs.
+ * # If the stemmer returns a stem that is the same as the
+ * original term, no stemming occurs.
+ * # If the difference between the stemmed word and the original
+ * term is more than maximum allowed, no stemming occurs. This
+ * works under the assumption that if the user has typed a long
+ * word, they do not want to "go back" to searching for a much
+ * shorter version of it. (For example, "accountancies" stems
+ * to "account").
*
* Otherwise, the stem for the term is returned.
*/
- private async string? stem_search_term(string term,
- GLib.Cancellable? cancellable) {
- if (!this.allow_stemming)
- return null;
+ private void stem_search_terms(EmailTextTerm text) {
+ var stemmed_terms = new Gee.ArrayList<string?>();
+ foreach (var term in text.terms) {
+ int term_length = term.length;
+ string? stemmed = null;
+ if (term_length > text.matching_strategy.get_min_term_length_for_stemming()) {
+ stemmed = this.stemmer.stem(term, term_length);
+ if (String.is_empty(stemmed) ||
+ term == stemmed ||
+ term_length - stemmed.length >
+ text.matching_strategy.get_max_difference_term_stem_lengths()) {
+ stemmed = null;
+ }
+ }
+ if (stemmed != null) {
+ this.has_stemmed_terms = true;
+ debug(@"Search term \"$term\" stemmed to \"$stemmed\"");
+ } else {
+ debug(@"Search term \"$term\" not stemmed");
+ }
+ stemmed_terms.add(stemmed);
+ }
+ text.set_data(EMAIL_TEXT_STEMMED_TERMS, stemmed_terms);
+ }
- int term_length = term.length;
- if (term_length < this.min_term_length_for_stemming)
- return null;
+ private bool sql_add_term_conditions(GLib.StringBuilder sql,
+ bool have_added_sql_condition) {
+ if (!this.expression.is_empty) {
+ if (have_added_sql_condition) {
+ sql.append(" AND");
+ }
+ have_added_sql_condition = true;
+ var is_first_match_term = true;
+ sql.append(" MessageSearchTable MATCH '");
+ foreach (var term in this.expression) {
+ if (!is_first_match_term) {
+ sql.append(" AND");
+ }
- string? stemmed = this.stemmer.stem(term, term.length);
- if (String.is_empty(stemmed)) {
- debug("Empty stemmed term returned for \"%s\"", term);
- return null;
- }
+ if (term.is_negated) {
+ sql.append(" NOT");
+ }
- // If same term returned, treat as non-stemmed
- if (stemmed == term)
- return null;
+ if (term.get_type() == typeof(EmailTextTerm)) {
+ sql_add_email_text_term_conditions((EmailTextTerm) term, sql);
+ } else if (term.get_type() == typeof(EmailFlagTerm)) {
+ sql.append(" ({flags} : \"' || ? || '\")");
+ }
- // Don't search for stemmed words that are significantly shorter than the user's search term
- if (term_length - stemmed.length > this.max_difference_term_stem_lengths) {
- debug("Stemmed \"%s\" dropped searching for \"%s\": too much distance in terms",
- stemmed, term);
+ is_first_match_term = false;
+ }
+ sql.append("'");
+ }
+ return have_added_sql_condition;
+ }
- return null;
+ private void sql_add_email_text_term_conditions(EmailTextTerm text,
+ GLib.StringBuilder sql) {
+ var target = "";
+ switch (text.target) {
+ case ALL:
+ target = "";
+ break;
+ case TO:
+ target = "receivers";
+ break;
+ case CC:
+ target = "cc";
+ break;
+ case BCC:
+ target = "bcc";
+ break;
+ case FROM:
+ target = "from";
+ break;
+ case SUBJECT:
+ target = "subject";
+ break;
+ case BODY:
+ target = "body";
+ break;
+ case ATTACHMENT_NAME:
+ target = "attachments";
+ break;
+ }
+
+ var values = text.terms;
+ var stemmed_values = text.get_data<Gee.List<string?>>(
+ EMAIL_TEXT_STEMMED_TERMS
+ );
+ for (int i = 0; i < values.size; i++) {
+ if (target != "") {
+ sql.append_printf(" ({%s} :", target);
+ }
+ if (stemmed_values != null && stemmed_values[i] != null) {
+ sql.append(" \"' || ? || '\"* OR \"' || ? || '\"*");
+ } else {
+ sql.append(" \"' || ? || '\"*");
+ }
+ if (target != "") {
+ sql.append_c(')');
+ }
}
+ }
- debug("Search processing: term -> stem is \"%s\" -> \"%s\"", term, stemmed);
- return stemmed;
+ private int sql_bind_term_conditions(Db.Statement sql,
+ bool text_only,
+ int index)
+ throws Geary.DatabaseError {
+ int next_index = index;
+ foreach (var term in this.expression) {
+ var type = term.get_type();
+ if (type == typeof(EmailTextTerm)) {
+ var text = (EmailTextTerm) term;
+ var stemmed_terms = text.get_data<Gee.List<string?>>(
+ EMAIL_TEXT_STEMMED_TERMS
+ );
+ for (int i = 0; i < text.terms.size; i++) {
+ sql.bind_string(next_index++, text.terms[i]);
+ if (stemmed_terms != null && stemmed_terms[i] != null) {
+ sql.bind_string(next_index++, stemmed_terms[i]);
+ }
+ }
+ } else if (type == typeof(EmailFlagTerm)) {
+ var flag = (EmailFlagTerm) term;
+ sql.bind_string(next_index++, flag.value.serialise());
+ }
+ }
+ return next_index;
}
}
diff --git a/src/engine/imap-engine/imap-engine-generic-account.vala
b/src/engine/imap-engine/imap-engine-generic-account.vala
index 4282ca046..fb5ab4d9b 100644
--- a/src/engine/imap-engine/imap-engine-generic-account.vala
+++ b/src/engine/imap-engine/imap-engine-generic-account.vala
@@ -580,9 +580,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
string text,
GLib.Cancellable? cancellable
) throws GLib.Error {
- return yield new ImapDB.SearchQuery(
- this, this.local, expression, text, this.stemmer, EXACT, cancellable
- );
+ return new ImapDB.SearchQuery(expression, text, this.stemmer);
}
public override async Gee.MultiMap<Geary.Email, Geary.FolderPath?>? local_search_message_id_async(
diff --git a/test/engine/imap-db/imap-db-search-query-test.vala
b/test/engine/imap-db/imap-db-search-query-test.vala
new file mode 100644
index 000000000..510402611
--- /dev/null
+++ b/test/engine/imap-db/imap-db-search-query-test.vala
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2020 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+public class Geary.ImapDB.SearchQueryTest : TestCase {
+
+
+ private GLib.File? tmp_dir = null;
+ private Geary.AccountInformation? config = null;
+ private Account? account = null;
+ private SnowBall.Stemmer? stemmer = null;
+
+
+ public SearchQueryTest() {
+ base("Geary.ImapDB.SearchQueryTest");
+ add_test("email_text_terms", email_text_terms);
+ add_test("email_text_terms_stemmed", email_text_terms_stemmed);
+ add_test("email_text_terms_specific", email_text_terms_specific);
+ add_test("email_flag_terms", email_flag_terms);
+ }
+
+ public override void set_up() throws GLib.Error {
+ this.tmp_dir = GLib.File.new_for_path(
+ GLib.DirUtils.make_tmp("geary-imap-db-search-query-test-XXXXXX")
+ );
+
+ this.config = new Geary.AccountInformation(
+ "test",
+ ServiceProvider.OTHER,
+ new Mock.CredentialsMediator(),
+ new Geary.RFC822.MailboxAddress(null, "test example com")
+ );
+
+ this.account = new Account(
+ config,
+ this.tmp_dir,
+ GLib.File.new_for_path(_SOURCE_ROOT_DIR).get_child("sql")
+ );
+ this.account.open_async.begin(
+ null,
+ this.async_completion
+ );
+ this.account.open_async.end(async_result());
+
+ this.stemmer = new SnowBall.Stemmer("english");
+
+ Db.Context.enable_sql_logging = true;
+ }
+
+ public override void tear_down() throws GLib.Error {
+ Db.Context.enable_sql_logging = false;
+
+ this.stemmer = null;
+
+ this.account.close_async.begin(
+ null,
+ this.async_completion
+ );
+ this.account.close_async.end(async_result());
+ this.account = null;
+ this.config = null;
+
+ delete_file(this.tmp_dir);
+ this.tmp_dir = null;
+ }
+
+ public void email_text_terms() throws GLib.Error {
+ var single_all_term = new_search_query(
+ { new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "test")},
+ "test"
+ );
+ assert_queries(single_all_term);
+
+ var multiple_all_term = new_search_query(
+ {
+ new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "foo"),
+ new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "bar")
+ },
+ "foo bar"
+ );
+ assert_queries(multiple_all_term);
+
+ var all_to_term = new_search_query(
+ {
+ new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "foo"),
+ new Geary.SearchQuery.EmailTextTerm(TO, EXACT, "bar")
+ },
+ "foo to:bar"
+ );
+ assert_queries(all_to_term);
+
+ var all_to_all_term = new_search_query(
+ {
+ new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "foo"),
+ new Geary.SearchQuery.EmailTextTerm(TO, EXACT, "bar"),
+ new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "baz")
+ },
+ "foo to:bar baz"
+ );
+ assert_queries(all_to_all_term);
+ }
+
+ public void email_text_terms_stemmed() throws GLib.Error {
+ var single_all_term = new_search_query(
+ { new Geary.SearchQuery.EmailTextTerm(ALL, CONSERVATIVE, "universal")},
+ "universal"
+ );
+ assert_queries(single_all_term);
+
+ var multiple_all_term = new_search_query(
+ {
+ new Geary.SearchQuery.EmailTextTerm(ALL, CONSERVATIVE, "universal"),
+ new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "bar")
+ },
+ "universal bar"
+ );
+ assert_queries(multiple_all_term);
+
+ var all_to_term = new_search_query(
+ {
+ new Geary.SearchQuery.EmailTextTerm(ALL, CONSERVATIVE, "universal"),
+ new Geary.SearchQuery.EmailTextTerm(TO, EXACT, "bar")
+ },
+ "universal to:bar"
+ );
+ assert_queries(all_to_term);
+ }
+
+ public void email_text_terms_specific() throws GLib.Error {
+ var single_term = new_search_query(
+ { new Geary.SearchQuery.EmailTextTerm(SUBJECT, EXACT, "test")},
+ "subject:test"
+ );
+ assert_queries(single_term);
+
+ var missing_term = new_search_query(
+ { new Geary.SearchQuery.EmailTextTerm(SUBJECT, EXACT, "")},
+ "subject:"
+ );
+ assert_queries(missing_term);
+
+ var conflicting_property = new_search_query(
+ { new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "subject:")},
+ "subject:"
+ );
+ assert_queries(conflicting_property);
+
+ var conflicting_property_and_term = new_search_query(
+ { new Geary.SearchQuery.EmailTextTerm(SUBJECT, EXACT, "subject:")},
+ "subject:subject:"
+ );
+ assert_queries(conflicting_property_and_term);
+ }
+
+ public void email_flag_terms() throws GLib.Error {
+ var unread = new_search_query(
+ { new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD)},
+ "is:unread"
+ );
+ assert_queries(unread);
+
+ var flagged = new_search_query(
+ { new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.FLAGGED)},
+ "is:flagged"
+ );
+ assert_queries(flagged);
+ }
+
+ private SearchQuery new_search_query(Geary.SearchQuery.Term[] ops, string raw)
+ throws GLib.Error {
+ return new SearchQuery(
+ new Gee.ArrayList<Geary.SearchQuery.Term>.wrap(ops),
+ raw,
+ this.stemmer
+ );
+ }
+
+ private void assert_queries(SearchQuery query) throws GLib.Error {
+ var search = query.get_search_query(
+ this.account.db.get_primary_connection(),
+ null,
+ null,
+ 0,
+ 10,
+ null
+ );
+ search.exec(null);
+
+ var match = query.get_match_query(
+ this.account.db.get_primary_connection(),
+ ""
+ );
+ match.exec(null);
+ }
+
+}
diff --git a/test/meson.build b/test/meson.build
index a50f12a6d..d4359f64b 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -53,6 +53,7 @@ test_engine_sources = [
'engine/imap-db/imap-db-database-test.vala',
'engine/imap-db/imap-db-email-identifier-test.vala',
'engine/imap-db/imap-db-folder-test.vala',
+ 'engine/imap-db/imap-db-search-query-test.vala',
'engine/imap-engine/account-processor-test.vala',
'engine/imap-engine/imap-engine-generic-account-test.vala',
'engine/mime/mime-content-type-test.vala',
diff --git a/test/test-engine.vala b/test/test-engine.vala
index 0260ef6a8..a86ee94c1 100644
--- a/test/test-engine.vala
+++ b/test/test-engine.vala
@@ -72,6 +72,7 @@ int main(string[] args) {
engine.add_suite(new Geary.ImapDB.DatabaseTest().suite);
engine.add_suite(new Geary.ImapDB.EmailIdentifierTest().suite);
engine.add_suite(new Geary.ImapDB.FolderTest().suite);
+ engine.add_suite(new Geary.ImapDB.SearchQueryTest().suite);
engine.add_suite(new Geary.ImapEngine.AccountProcessorTest().suite);
engine.add_suite(new Geary.ImapEngine.GenericAccountTest().suite);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]