[gnome-software/1672-gnome-software-show-details-does-not-open-its-own-details-page: 23/23] gs-appstream: Tokenize search term and prioritize full matches
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-software/1672-gnome-software-show-details-does-not-open-its-own-details-page: 23/23] gs-appstream: Tokenize search term and prioritize full matches
- Date: Fri, 11 Mar 2022 09:29:51 +0000 (UTC)
commit 4bfd9fc68b84c0ad22c9cd3bbb31902236ecf85f
Author: Milan Crha <mcrha redhat com>
Date: Tue Mar 8 14:28:14 2022 +0100
gs-appstream: Tokenize search term and prioritize full matches
When the search term is a single term, try to tokenize it and search
the apps with the original term and the tokens, prioritizing
the match on the original term.
Closes https://gitlab.gnome.org/GNOME/gnome-software/-/issues/1672
lib/gs-appstream.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 52 insertions(+), 8 deletions(-)
---
diff --git a/lib/gs-appstream.c b/lib/gs-appstream.c
index 14cf8887f..735076e24 100644
--- a/lib/gs-appstream.c
+++ b/lib/gs-appstream.c
@@ -1328,15 +1328,28 @@ gs_appstream_silo_search_component2 (GPtrArray *array, XbNode *component, const
}
static guint16
-gs_appstream_silo_search_component (GPtrArray *array, XbNode *component, const gchar * const *search)
+gs_appstream_silo_search_component (GPtrArray *array,
+ XbNode *component,
+ const gchar * const *search,
+ gboolean values_tokenized)
{
guint16 matches_sum = 0;
/* do *all* search keywords match */
for (guint i = 0; search[i] != NULL; i++) {
guint tmp = gs_appstream_silo_search_component2 (array, component, search[i]);
- if (tmp == 0)
+ if (tmp == 0 && (!values_tokenized || i != 0))
return 0;
+ if (tmp == 0)
+ continue;
+
+ /* Shift the result, to be able to mark matches on the first token */
+ tmp = tmp << 1;
+
+ /* The first token is the full match as entered by the user; tag the match sum as such, to
prioritize this one */
+ if (i == 0 && values_tokenized)
+ tmp = tmp | 0x1;
+
matches_sum |= tmp;
}
return matches_sum;
@@ -1353,7 +1366,9 @@ gs_appstream_search (GsPlugin *plugin,
g_autoptr(GError) error_local = NULL;
g_autoptr(GPtrArray) array = g_ptr_array_new_with_free_func ((GDestroyNotify)
gs_appstream_search_helper_free);
g_autoptr(GPtrArray) components = NULL;
+ g_autoptr(GPtrArray) search_tokens = NULL;
g_autoptr(GTimer) timer = g_timer_new ();
+ gboolean values_tokenized = FALSE;
const struct {
AsSearchTokenMatch match_value;
const gchar *xpath;
@@ -1369,6 +1384,31 @@ gs_appstream_search (GsPlugin *plugin,
{ AS_SEARCH_TOKEN_MATCH_NONE, NULL }
};
+ /* Also tokenize the search term, if it's only one */
+ if (values[0] != NULL && values[1] == NULL) {
+ g_autoptr(AsPool) as_pool = as_pool_new ();
+ g_auto(GStrv) tokens = as_pool_build_search_tokens (as_pool, values[0]);
+
+ if (tokens == NULL) {
+ g_set_error (error, GS_PLUGIN_ERROR,
+ GS_PLUGIN_ERROR_NOT_SUPPORTED,
+ "failed to tokenize '%s'", values[0]);
+ return FALSE;
+ }
+
+ /* There is at least one token, which can be case-folded or similarly changed */
+ if (tokens != NULL && tokens[0] != NULL && (tokens[1] != NULL || g_ascii_strcasecmp
(tokens[0], values[0]) != 0)) {
+ search_tokens = g_ptr_array_new_with_free_func (g_free);
+ g_ptr_array_add (search_tokens, g_strdup (values[0]));
+ for (guint i = 0; tokens[i]; i++) {
+ g_ptr_array_add (search_tokens, g_strdup (tokens[i]));
+ }
+ g_ptr_array_add (search_tokens, NULL);
+ values = (const gchar * const *) search_tokens->pdata;
+ values_tokenized = TRUE;
+ }
+ }
+
/* add some weighted queries */
for (guint i = 0; queries[i].xpath != NULL; i++) {
g_autoptr(GError) error_query = NULL;
@@ -1395,7 +1435,7 @@ gs_appstream_search (GsPlugin *plugin,
}
for (guint i = 0; i < components->len; i++) {
XbNode *component = g_ptr_array_index (components, i);
- guint16 match_value = gs_appstream_silo_search_component (array, component, values);
+ guint16 match_value = gs_appstream_silo_search_component (array, component, values,
values_tokenized);
if (match_value != 0) {
g_autoptr(GsApp) app = gs_appstream_create_app (plugin, silo, component, error);
if (app == NULL)
@@ -1407,11 +1447,15 @@ gs_appstream_search (GsPlugin *plugin,
}
g_debug ("add %s", gs_app_get_unique_id (app));
- /* The match value is used for prioritising results.
- * Drop the ID token from it as it’s the highest
- * numeric value but isn’t visible to the user in the
- * UI, which leads to confusing results ordering. */
- gs_app_set_match_value (app, match_value & (~AS_SEARCH_TOKEN_MATCH_ID));
+ if (!values_tokenized || ((match_value & 1) == 0)) {
+ /* The match value is used for prioritising results.
+ * Drop the ID token from it as it’s the highest
+ * numeric value but isn’t visible to the user in the
+ * UI, which leads to confusing results ordering. */
+ match_value = match_value & (~(AS_SEARCH_TOKEN_MATCH_ID << 1));
+ }
+
+ gs_app_set_match_value (app, match_value);
gs_app_list_add (list, app);
if (gs_app_get_kind (app) == AS_COMPONENT_KIND_ADDON) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]