zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #04779
[Branch ~zeitgeist/zeitgeist/bluebird] Rev 391: Merged lp:~mhr3/zeitgeist/fts-extras
Merge authors:
Michal Hruby (mhr3)
Related merge proposals:
https://code.launchpad.net/~mhr3/zeitgeist/fts-extras/+merge/92430
proposed by: Michal Hruby (mhr3)
review: Approve - Siegfried Gevatter (rainct)
------------------------------------------------------------
revno: 391 [merge]
committer: Michal Hruby <michal.mhr@xxxxxxxxx>
branch nick: zeitgeist
timestamp: Fri 2012-02-10 13:30:21 +0100
message:
Merged lp:~mhr3/zeitgeist/fts-extras
modified:
configure.ac
extensions/fts++/Makefile.am
extensions/fts++/fts.cpp
extensions/fts++/fts.h
extensions/fts++/fts.vapi
extensions/fts++/indexer.cpp
extensions/fts++/indexer.h
extensions/fts++/stringutils.cpp
extensions/fts++/stringutils.h
extensions/fts++/test/Makefile.am
extensions/fts++/test/test-indexer.cpp
extensions/fts++/test/test-stringutils.cpp
extensions/fts++/zeitgeist-fts.vala
extensions/fts.vala
src/remote.vala
--
lp:zeitgeist
https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird
Your team Zeitgeist Framework Team is subscribed to branch lp:zeitgeist.
To unsubscribe from this branch go to https://code.launchpad.net/~zeitgeist/zeitgeist/bluebird/+edit-subscription
=== modified file 'configure.ac'
--- configure.ac 2012-02-08 18:54:58 +0000
+++ configure.ac 2012-02-09 15:32:36 +0000
@@ -40,6 +40,30 @@
AC_SUBST(ZEITGEIST_LIBS)
#################################################
+# Dee-ICU check
+#################################################
+DEE_ICU_REQUIRED=1.0.2
+
+AC_ARG_WITH([dee-icu],
+ AS_HELP_STRING([--with-dee-icu[=@<:@no/auto/yes@:>@]],
+ [Build the FTS extension with dee-icu]),
+ [with_dee_icu=$withval],
+ [with_dee_icu="auto"])
+
+if test "x$with_dee_icu" = "xauto" ; then
+ PKG_CHECK_EXISTS([dee-icu-1.0 >= $DEE_ICU_REQUIRED],
+ with_dee_icu="yes",
+ with_dee_icu="no")
+fi
+
+if test "x$with_dee_icu" = "xyes" ; then
+ PKG_CHECK_MODULES(DEE_ICU, dee-icu-1.0 >= $DEE_ICU_REQUIRED)
+ AC_DEFINE(HAVE_DEE_ICU, 1, [Have dee-icu])
+fi
+
+AM_CONDITIONAL(HAVE_DEE_ICU, test "x$with_dee_icu" = "xyes")
+
+#################################################
# DBus service
#################################################
@@ -88,3 +112,16 @@
fi
AC_OUTPUT
+
+cat <<EOF
+
+${PACKAGE}-${VERSION}
+
+ Build Environment
+ Install Prefix: ${prefix}
+
+ Optional dependencies
+ dee-icu: ${with_dee_icu}
+
+EOF
+
=== modified file 'extensions/fts++/Makefile.am'
--- extensions/fts++/Makefile.am 2012-02-08 18:54:58 +0000
+++ extensions/fts++/Makefile.am 2012-02-09 15:32:36 +0000
@@ -76,6 +76,11 @@
-lxapian \
$(NULL)
+if HAVE_DEE_ICU
+AM_CPPFLAGS += $(DEE_ICU_CFLAGS)
+zeitgeist_fts_LDADD += $(DEE_ICU_LIBS)
+endif
+
BUILT_SOURCES = \
zeitgeist-internal.stamp \
zeitgeist-fts_vala.stamp \
=== modified file 'extensions/fts++/fts.cpp'
--- extensions/fts++/fts.cpp 2012-02-09 09:32:33 +0000
+++ extensions/fts++/fts.cpp 2012-02-09 18:34:36 +0000
@@ -84,6 +84,36 @@
return results;
}
+GPtrArray*
+zeitgeist_indexer_search_with_relevancies (ZeitgeistIndexer *indexer,
+ const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ gdouble **relevancies,
+ gint *relevancies_size,
+ guint *matches,
+ GError **error)
+{
+ GPtrArray *results;
+ ZeitgeistFTS::Controller *_indexer;
+
+ g_return_val_if_fail (indexer != NULL, NULL);
+ g_return_val_if_fail (search_string != NULL, NULL);
+ g_return_val_if_fail (ZEITGEIST_IS_TIME_RANGE (time_range), NULL);
+ g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+ _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+ results = _indexer->indexer->SearchWithRelevancies (
+ search_string, time_range, templates, offset, count, result_type,
+ relevancies, relevancies_size, matches, error);
+
+ return results;
+}
+
void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer,
GPtrArray *events)
{
=== modified file 'extensions/fts++/fts.h'
--- extensions/fts++/fts.h 2012-02-09 09:32:33 +0000
+++ extensions/fts++/fts.h 2012-02-09 18:34:36 +0000
@@ -43,6 +43,19 @@
guint *matches,
GError **error);
+GPtrArray* zeitgeist_indexer_search_with_relevancies
+ (ZeitgeistIndexer *indexer,
+ const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ gdouble **relevancies,
+ gint *relevancies_size,
+ guint *matches,
+ GError **error);
+
void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer,
GPtrArray *events);
=== modified file 'extensions/fts++/fts.vapi'
--- extensions/fts++/fts.vapi 2012-02-07 17:02:30 +0000
+++ extensions/fts++/fts.vapi 2012-02-09 18:34:36 +0000
@@ -14,6 +14,16 @@
ResultType result_type,
out uint matches) throws GLib.Error;
+ public GLib.GenericArray<Event> search_with_relevancies (
+ string search_string,
+ TimeRange time_range,
+ GLib.GenericArray<Event> templates,
+ uint offset,
+ uint count,
+ ResultType result_type,
+ out double[] relevancies,
+ out uint matches) throws GLib.Error;
+
public void index_events (GLib.GenericArray<Event> events);
public void delete_events (uint[] event_ids);
=== modified file 'extensions/fts++/indexer.cpp'
--- extensions/fts++/indexer.cpp 2012-02-09 09:37:48 +0000
+++ extensions/fts++/indexer.cpp 2012-02-10 11:54:32 +0000
@@ -356,10 +356,40 @@
}
}
+std::string Indexer::PreprocessString (std::string const& input)
+{
+ if (input.empty ()) return input;
+
+ std::string result (StringUtils::RemoveUnderscores (input));
+ // a simple heuristic for the uncamelcaser
+ size_t num_digits = StringUtils::CountDigits (result);
+ if (result.length () > 3 && num_digits < result.length () / 2)
+ {
+ // FIXME: process digits?, atm they stay attached to the text
+ result = StringUtils::UnCamelcase (result);
+ }
+
+ std::string folded (StringUtils::AsciiFold (result));
+ if (!folded.empty ())
+ {
+ result += ' ';
+ result += folded;
+ }
+
+#ifdef DEBUG_PREPROCESSING
+ if (input != result)
+ g_debug ("processed: %s\n-> %s", input.c_str (), result.c_str ());
+#endif
+
+ return result;
+}
+
void Indexer::IndexText (std::string const& text)
{
- // FIXME: ascii folding!
tokenizer->index_text (text, 5);
+ // this is by definition already a human readable display string,
+ // so it shouldn't need removal of underscores and uncamelcase
+ tokenizer->index_text (StringUtils::AsciiFold (text), 5);
}
void Indexer::IndexUri (std::string const& uri, std::string const& origin)
@@ -403,9 +433,10 @@
gchar *pn = g_file_get_parse_name (f);
gchar *basename = g_path_get_basename (pn);
- // FIXME: remove unscores, CamelCase and process digits
- tokenizer->index_text (basename, 5);
- tokenizer->index_text (basename, 5, "N");
+ // remove unscores, CamelCase and process digits
+ std::string processed (PreprocessString (basename));
+ tokenizer->index_text (processed, 5);
+ tokenizer->index_text (processed, 5, "N");
g_free (basename);
// limit the directory indexing to just a few levels
@@ -420,17 +451,17 @@
g_free (dir);
g_free (pn);
- while (path_component.length () > 2 &&
+ while (path_component.length () > 2 &&
weight_index < G_N_ELEMENTS (path_weights))
{
// if this is already home directory we don't want it
- if (path_component.length () == home_dir_path.length () &&
- path_component == home_dir_path) return;
+ if (path_component == home_dir_path) return;
gchar *name = g_path_get_basename (path_component.c_str ());
- // FIXME: un-underscore, uncamelcase, ascii fold
- tokenizer->index_text (name, path_weights[weight_index++]);
+ // un-underscore, uncamelcase, ascii fold
+ processed = PreprocessString (name);
+ tokenizer->index_text (processed, path_weights[weight_index++]);
dir = g_path_get_dirname (path_component.c_str ());
path_component = dir;
@@ -471,9 +502,10 @@
if (g_utf8_validate (unescaped_basename, -1, NULL))
{
- // FIXME: remove unscores, CamelCase and process digits
- tokenizer->index_text (unescaped_basename, 5);
- tokenizer->index_text (unescaped_basename, 5, "N");
+ // remove unscores, CamelCase and process digits
+ std::string processed (PreprocessString (unescaped_basename));
+ tokenizer->index_text (processed, 5);
+ tokenizer->index_text (processed, 5, "N");
}
// and also index hostname (taken from origin field if possible)
@@ -505,6 +537,7 @@
{
// we *really* don't want to index anything with this scheme
}
+ // how about special casing (s)ftp and ssh?
else
{
std::string authority, path, query;
@@ -593,12 +626,11 @@
unsigned name_weight = is_subject ? 5 : 2;
unsigned comment_weight = 2;
- // FIXME: ascii folding somewhere
-
val = g_app_info_get_display_name (ai);
if (val && val[0] != '\0')
{
- std::string display_name (val);
+ std::string display_name (PreprocessString (val));
+
tokenizer->index_text (display_name, name_weight);
tokenizer->index_text (display_name, name_weight, "A");
}
@@ -606,9 +638,14 @@
val = g_desktop_app_info_get_generic_name (dai);
if (val && val[0] != '\0')
{
+ // this shouldn't need uncamelcasing
std::string generic_name (val);
+ std::string generic_name_folded (StringUtils::AsciiFold (generic_name));
+
tokenizer->index_text (generic_name, name_weight);
tokenizer->index_text (generic_name, name_weight, "A");
+ tokenizer->index_text (generic_name_folded, name_weight);
+ tokenizer->index_text (generic_name_folded, name_weight, "A");
}
if (!is_subject) return true;
@@ -642,7 +679,35 @@
return true;
}
-GPtrArray* Indexer::Search (const gchar *search_string,
+std::string Indexer::CompileQueryString (const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates)
+{
+ std::string query_string (search_string);
+
+ if (templates && templates->len > 0)
+ {
+ std::string filters (CompileEventFilterQuery (templates));
+ query_string = "(" + query_string + ") AND (" + filters + ")";
+ }
+
+ if (time_range)
+ {
+ gint64 start_time = zeitgeist_time_range_get_start (time_range);
+ gint64 end_time = zeitgeist_time_range_get_end (time_range);
+
+ if (start_time > 0 || end_time < G_MAXINT64)
+ {
+ std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time));
+ query_string = "(" + query_string + ") AND (" + time_filter + ")";
+ }
+ }
+
+ g_debug ("query: %s", query_string.c_str ());
+ return query_string;
+}
+
+GPtrArray* Indexer::Search (const gchar *search,
ZeitgeistTimeRange *time_range,
GPtrArray *templates,
guint offset,
@@ -654,28 +719,22 @@
GPtrArray *results = NULL;
try
{
- std::string query_string(search_string);
-
- if (templates && templates->len > 0)
- {
- std::string filters (CompileEventFilterQuery (templates));
- query_string = "(" + query_string + ") AND (" + filters + ")";
- }
-
- if (time_range)
- {
- gint64 start_time = zeitgeist_time_range_get_start (time_range);
- gint64 end_time = zeitgeist_time_range_get_end (time_range);
-
- if (start_time > 0 || end_time < G_MAXINT64)
- {
- std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time));
- query_string = "(" + query_string + ") AND (" + time_filter + ")";
- }
- }
-
- // FIXME: which result types coalesce?
- guint maxhits = count * 3;
+ std::string query_string (CompileQueryString (search, time_range, templates));
+
+ // When sorting by some result types, we need to fetch some extra events
+ // from the Xapian index because the final result set will be coalesced
+ // on some property of the event
+ guint maxhits;
+ if (result_type == 100 ||
+ result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
+ result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
+ {
+ maxhits = count;
+ }
+ else
+ {
+ maxhits = count * 3;
+ }
if (result_type == 100)
{
@@ -686,7 +745,6 @@
enquire->set_sort_by_value (VALUE_TIMESTAMP, true);
}
- g_debug ("query: %s", query_string.c_str ());
Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS));
enquire->set_query (q);
Xapian::MSet hits (enquire->get_mset (offset, maxhits));
@@ -753,7 +811,119 @@
}
catch (Xapian::Error const& e)
{
- g_warning ("Failed to index event: %s", e.get_msg ().c_str ());
+ g_warning ("Failed to search index: %s", e.get_msg ().c_str ());
+ g_set_error_literal (error,
+ ZEITGEIST_ENGINE_ERROR,
+ ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
+ e.get_msg ().c_str ());
+ }
+
+ return results;
+}
+
+GPtrArray* Indexer::SearchWithRelevancies (const gchar *search,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ gdouble **relevancies,
+ gint *relevancies_size,
+ guint *matches,
+ GError **error)
+{
+ GPtrArray *results = NULL;
+ try
+ {
+ std::string query_string (CompileQueryString (search, time_range, templates));
+
+ guint maxhits = count;
+
+ if (result_type == 100)
+ {
+ enquire->set_sort_by_relevance ();
+ }
+ else
+ {
+ enquire->set_sort_by_value (VALUE_TIMESTAMP, true);
+ }
+
+ Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS));
+ enquire->set_query (q);
+ Xapian::MSet hits (enquire->get_mset (offset, maxhits));
+ Xapian::doccount hitcount = hits.get_matches_estimated ();
+
+ if (result_type == 100)
+ {
+ std::vector<unsigned> event_ids;
+ std::vector<gdouble> relevancy_arr;
+ Xapian::MSetIterator iter, end;
+ for (iter = hits.begin (), end = hits.end (); iter != end; ++iter)
+ {
+ Xapian::Document doc(iter.get_document ());
+ double unserialized =
+ Xapian::sortable_unserialise (doc.get_value (VALUE_EVENT_ID));
+ unsigned event_id = static_cast<unsigned>(unserialized);
+ event_ids.push_back (event_id);
+
+ double rank = iter.get_percent () / 100.;
+ relevancy_arr.push_back (rank);
+ }
+
+ results = zeitgeist_db_reader_get_events (zg_reader,
+ &event_ids[0],
+ event_ids.size (),
+ NULL,
+ error);
+
+ if (results->len != relevancy_arr.size ())
+ {
+ g_warning ("Results don't match relevancies!");
+ g_set_error_literal (error,
+ ZEITGEIST_ENGINE_ERROR,
+ ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
+ "Internal database error");
+ return NULL;
+ }
+
+ if (relevancies)
+ {
+ *relevancies = (gdouble*) g_memdup (&relevancy_arr[0],
+ sizeof (gdouble) * results->len);
+ }
+ if (relevancies_size)
+ {
+ *relevancies_size = relevancy_arr.size ();
+ }
+ }
+ else
+ {
+ g_set_error_literal (error,
+ ZEITGEIST_ENGINE_ERROR,
+ ZEITGEIST_ENGINE_ERROR_INVALID_ARGUMENT,
+ "Only RELEVANCY result type is supported");
+ /*
+ * perhaps something like this could be used here?
+ std::map<unsigned, gdouble> relevancy_map;
+ foreach (...)
+ {
+ double rank = iter.get_percent () / 100.;
+ if (rank > relevancy_map[event_id])
+ {
+ relevancy_map[event_id] = rank;
+ }
+ }
+ */
+ }
+
+ if (matches)
+ {
+ *matches = hitcount;
+ }
+ }
+ catch (Xapian::Error const& e)
+ {
+ g_warning ("Failed to search index: %s", e.get_msg ().c_str ());
g_set_error_literal (error,
ZEITGEIST_ENGINE_ERROR,
ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
=== modified file 'extensions/fts++/indexer.h'
--- extensions/fts++/indexer.h 2012-02-09 09:37:48 +0000
+++ extensions/fts++/indexer.h 2012-02-10 11:30:52 +0000
@@ -77,7 +77,7 @@
void DeleteEvent (guint32 event_id);
void SetDbMetadata (std::string const& key, std::string const& value);
- GPtrArray* Search (const gchar *search_string,
+ GPtrArray* Search (const gchar *search,
ZeitgeistTimeRange *time_range,
GPtrArray *templates,
guint offset,
@@ -85,11 +85,26 @@
ZeitgeistResultType result_type,
guint *matches,
GError **error);
+ GPtrArray* SearchWithRelevancies (const gchar *search,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ gdouble **relevancies,
+ gint *relevancies_size,
+ guint *matches,
+ GError **error);
private:
std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri);
std::string CompileEventFilterQuery (GPtrArray *templates);
std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end);
+ std::string CompileQueryString (const gchar *search,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates);
+
+ std::string PreprocessString (std::string const& input);
void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc);
void IndexText (std::string const& text);
=== modified file 'extensions/fts++/stringutils.cpp'
--- extensions/fts++/stringutils.cpp 2012-02-09 09:32:33 +0000
+++ extensions/fts++/stringutils.cpp 2012-02-10 11:54:32 +0000
@@ -17,9 +17,14 @@
* Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@xxxxxxxxx>
*
*/
+
+#include "stringutils.h"
#include <string>
+#include <algorithm>
-#include "stringutils.h"
+#ifdef HAVE_DEE_ICU
+#include <dee-icu.h>
+#endif
using namespace std;
@@ -123,6 +128,87 @@
}
}
+string RemoveUnderscores (string const &input)
+{
+ string result (input);
+ std::replace (result.begin (), result.end (), '_', ' ');
+
+ return result;
+}
+
+static bool is_digit (char c) { return c >= '0' && c <= '9'; }
+
+size_t CountDigits (string const &input)
+{
+ return std::count_if (input.begin (), input.end (), is_digit);
+}
+
+static GRegex *camelcase_matcher = NULL;
+
+static gboolean
+matcher_cb (const GMatchInfo *match_info, GString *result, gpointer user_data)
+{
+ gint start_pos;
+ g_match_info_fetch_pos (match_info, 0, &start_pos, NULL);
+ if (start_pos != 0) g_string_append_c (result, ' ');
+ gchar *word = g_match_info_fetch (match_info, 0);
+ g_string_append (result, word);
+ g_free (word);
+
+ return FALSE;
+}
+
+string UnCamelcase (string const &input)
+{
+ if (camelcase_matcher == NULL)
+ {
+ camelcase_matcher = g_regex_new ("(?<=^|[[:lower:]])[[:upper:]]+[^[:upper:]]+", G_REGEX_OPTIMIZE, (GRegexMatchFlags) 0, NULL);
+ if (camelcase_matcher == NULL) g_critical ("Unable to create matcher!");
+ }
+
+ gchar *result = g_regex_replace_eval (camelcase_matcher, input.c_str (),
+ input.length (), 0,
+ (GRegexMatchFlags) 0,
+ matcher_cb, NULL, NULL);
+
+ string ret (result);
+ g_free (result);
+ return ret;
+}
+
+#ifdef HAVE_DEE_ICU
+static DeeICUTermFilter *icu_filter = NULL;
+
+/**
+ * Use ascii folding filter on the input text and return folded version
+ * of the original string.
+ *
+ * Note that if the folded version is exactly the same as the original
+ * empty string will be returned.
+ */
+string AsciiFold (string const& input)
+{
+ if (icu_filter == NULL)
+ {
+ icu_filter = dee_icu_term_filter_new_ascii_folder ();
+ if (icu_filter == NULL) return "";
+ }
+
+ // FIXME: check first if the input contains any non-ascii chars?
+
+ gchar *folded = dee_icu_term_filter_apply (icu_filter, input.c_str ());
+ string result (folded);
+ g_free (folded);
+
+ return result == input ? "" : result;
+}
+#else
+string AsciiFold (string const& input)
+{
+ return "";
+}
+#endif
+
} /* namespace StringUtils */
} /* namespace ZeitgeistFTS */
=== modified file 'extensions/fts++/stringutils.h'
--- extensions/fts++/stringutils.h 2012-02-09 09:32:33 +0000
+++ extensions/fts++/stringutils.h 2012-02-10 10:19:52 +0000
@@ -37,6 +37,14 @@
std::string &path,
std::string &basename);
+std::string RemoveUnderscores (std::string const &input);
+
+size_t CountDigits (std::string const &input);
+
+std::string UnCamelcase (std::string const &input);
+
+std::string AsciiFold (std::string const& input);
+
} /* namespace StringUtils */
} /* namespace ZeitgeistFTS */
=== modified file 'extensions/fts++/test/Makefile.am'
--- extensions/fts++/test/Makefile.am 2012-02-08 18:54:58 +0000
+++ extensions/fts++/test/Makefile.am 2012-02-09 15:32:36 +0000
@@ -25,3 +25,8 @@
-lxapian \
$(NULL)
+if HAVE_DEE_ICU
+AM_CPPFLAGS += $(DEE_ICU_CFLAGS)
+test_fts_LDADD += $(DEE_ICU_LIBS)
+endif
+
=== modified file 'extensions/fts++/test/test-indexer.cpp'
--- extensions/fts++/test/test-indexer.cpp 2012-02-09 09:32:33 +0000
+++ extensions/fts++/test/test-indexer.cpp 2012-02-10 12:07:27 +0000
@@ -145,6 +145,26 @@
return event;
}
+static ZeitgeistEvent* create_test_event5 (void)
+{
+ ZeitgeistEvent *event = zeitgeist_event_new ();
+ ZeitgeistSubject *subject = zeitgeist_subject_new ();
+
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_SOURCE_CODE);
+ zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_FILE_DATA_OBJECT);
+ zeitgeist_subject_set_uri (subject, "file:///home/username/projects/GLibSignalImplementation.cpp");
+ zeitgeist_subject_set_text (subject, "Because c++ is awesome");
+ zeitgeist_subject_set_mimetype (subject, "text/x-c++src");
+
+ zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_CREATE_EVENT);
+ zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+ zeitgeist_event_set_actor (event, "application://gedit.desktop");
+ zeitgeist_event_add_subject (event, subject);
+
+ g_object_unref (subject);
+ return event;
+}
+
// Steals the event, ref it if you want to keep it
static guint
index_event (Fixture *fix, ZeitgeistEvent *event)
@@ -426,6 +446,71 @@
}
static void
+test_simple_underscores (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+ index_event (fix, create_test_event3 ());
+ event_id = index_event (fix, create_test_event4 ());
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "fabulo*",
+ zeitgeist_time_range_new_anytime (),
+ g_ptr_array_new (),
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+}
+
+static void
+test_simple_camelcase (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+ index_event (fix, create_test_event3 ());
+ index_event (fix, create_test_event4 ());
+ event_id = index_event (fix, create_test_event5 ());
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "signal",
+ zeitgeist_time_range_new_anytime (),
+ g_ptr_array_new (),
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+}
+
+static void
test_simple_cjk (Fixture *fix, gconstpointer data)
{
guint matches;
@@ -517,6 +602,10 @@
setup, test_simple_noexpand, teardown);
g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0,
setup, test_simple_noexpand_valid, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleUnderscores", Fixture, 0,
+ setup, test_simple_underscores, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleCamelcase", Fixture, 0,
+ setup, test_simple_camelcase, teardown);
g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0,
setup, test_simple_url_unescape, teardown);
g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0,
=== modified file 'extensions/fts++/test/test-stringutils.cpp'
--- extensions/fts++/test/test-stringutils.cpp 2012-02-09 09:32:33 +0000
+++ extensions/fts++/test/test-stringutils.cpp 2012-02-10 11:54:32 +0000
@@ -163,6 +163,91 @@
g_assert_cmpstr ("type=A", ==, query.c_str ());
}
+static void
+test_ascii_fold (Fixture *fix, gconstpointer data)
+{
+ std::string folded;
+
+ folded = StringUtils::AsciiFold ("");
+ g_assert_cmpstr ("", ==, folded.c_str ());
+
+ // if the original matches the folded version, AsciiFold returns ""
+ folded = StringUtils::AsciiFold ("a");
+ g_assert_cmpstr ("", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("abcdef");
+ g_assert_cmpstr ("", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("å");
+ g_assert_cmpstr ("a", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("åå");
+ g_assert_cmpstr ("aa", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("aåaåa");
+ g_assert_cmpstr ("aaaaa", ==, folded.c_str ());
+}
+
+static void
+test_underscores (Fixture *fix, gconstpointer data)
+{
+ g_assert_cmpstr ("", ==, StringUtils::RemoveUnderscores ("").c_str ());
+
+ g_assert_cmpstr (" ", ==, StringUtils::RemoveUnderscores ("_").c_str ());
+
+ g_assert_cmpstr (" ", ==, StringUtils::RemoveUnderscores ("___").c_str ());
+
+ g_assert_cmpstr ("abcd", ==, StringUtils::RemoveUnderscores ("abcd").c_str ());
+
+ g_assert_cmpstr (" abcd ", ==, StringUtils::RemoveUnderscores ("_abcd_").c_str ());
+
+ g_assert_cmpstr ("a b c d", ==, StringUtils::RemoveUnderscores ("a_b_c_d").c_str ());
+}
+
+static void
+test_uncamelcase (Fixture *fix, gconstpointer data)
+{
+ g_assert_cmpstr ("", ==, StringUtils::UnCamelcase ("").c_str ());
+
+ g_assert_cmpstr ("abcd", ==, StringUtils::UnCamelcase ("abcd").c_str ());
+
+ g_assert_cmpstr ("Abcd", ==, StringUtils::UnCamelcase ("Abcd").c_str ());
+
+ g_assert_cmpstr ("ABCD", ==, StringUtils::UnCamelcase ("ABCD").c_str ());
+
+ g_assert_cmpstr ("ABcd", ==, StringUtils::UnCamelcase ("ABcd").c_str ());
+
+ g_assert_cmpstr ("Abcd Ef", ==, StringUtils::UnCamelcase ("AbcdEf").c_str ());
+
+ g_assert_cmpstr ("Text Editor", ==, StringUtils::UnCamelcase ("Text Editor").c_str ());
+
+ g_assert_cmpstr ("py Karaoke", ==, StringUtils::UnCamelcase ("pyKaraoke").c_str ());
+
+ g_assert_cmpstr ("Zeitgeist Project", ==, StringUtils::UnCamelcase ("ZeitgeistProject").c_str ());
+
+ g_assert_cmpstr ("Very Nice Camel Case Text", ==, StringUtils::UnCamelcase ("VeryNiceCamelCaseText").c_str ());
+
+ g_assert_cmpstr ("Ňeedš Ťo Wórk Óń Útf Čhářacters As WelL", ==,
+ StringUtils::UnCamelcase ("ŇeedšŤoWórkÓńÚtfČhářactersAsWelL").c_str ());
+}
+
+static void
+test_count_digits (Fixture *fix, gconstpointer data)
+{
+ g_assert_cmpuint (0, ==, StringUtils::CountDigits (""));
+
+ g_assert_cmpuint (0, ==, StringUtils::CountDigits ("abcdefghijklmnopqrstuvwxyz"));
+
+ g_assert_cmpuint (10, ==, StringUtils::CountDigits ("0123456789"));
+
+ g_assert_cmpuint (1, ==, StringUtils::CountDigits ("abc3"));
+
+ g_assert_cmpuint (3, ==, StringUtils::CountDigits ("::123__poa//weee"));
+
+ g_assert_cmpuint (5, ==, StringUtils::CountDigits ("PCN30129.JPG"));
+
+}
+
G_BEGIN_DECLS
void test_stringutils_create_suite (void)
@@ -173,6 +258,16 @@
setup, test_mangle, teardown);
g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0,
setup, test_split, teardown);
+ g_test_add ("/Zeitgeist/FTS/StringUtils/RemoveUnderscores", Fixture, 0,
+ setup, test_underscores, teardown);
+ g_test_add ("/Zeitgeist/FTS/StringUtils/UnCamelcase", Fixture, 0,
+ setup, test_uncamelcase, teardown);
+ g_test_add ("/Zeitgeist/FTS/StringUtils/CountDigits", Fixture, 0,
+ setup, test_count_digits, teardown);
+#ifdef HAVE_DEE_ICU
+ g_test_add ("/Zeitgeist/FTS/StringUtils/AsciiFold", Fixture, 0,
+ setup, test_ascii_fold, teardown);
+#endif
}
G_END_DECLS
=== modified file 'extensions/fts++/zeitgeist-fts.vala'
--- extensions/fts++/zeitgeist-fts.vala 2012-02-09 09:32:33 +0000
+++ extensions/fts++/zeitgeist-fts.vala 2012-02-09 18:34:36 +0000
@@ -132,6 +132,23 @@
events = Events.to_variant (results);
}
+ public async void search_with_relevancies (
+ string query_string, Variant time_range,
+ Variant filter_templates,
+ uint offset, uint count, uint result_type,
+ out Variant events, out double[] relevancies,
+ out uint matches)
+ throws Error
+ {
+ var tr = new TimeRange.from_variant (time_range);
+ var templates = Events.from_variant (filter_templates);
+ var results = instance.indexer.search_with_relevancies (
+ query_string, tr, templates, offset, count,
+ (ResultType) result_type, out relevancies, out matches);
+
+ events = Events.to_variant (results);
+ }
+
private static void name_acquired_callback (DBusConnection conn)
{
name_acquired = true;
=== modified file 'extensions/fts.vala'
--- extensions/fts.vala 2012-02-07 12:47:44 +0000
+++ extensions/fts.vala 2012-02-10 09:35:31 +0000
@@ -31,6 +31,14 @@
uint offset, uint count, uint result_type,
[DBus (signature = "a(asaasay)")] out Variant events,
out uint matches) throws Error;
+ public abstract async void search_with_relevancies (
+ string query_string,
+ [DBus (signature = "(xx)")] Variant time_range,
+ [DBus (signature = "a(asaasay)")] Variant filter_templates,
+ uint offset, uint count, uint result_type,
+ [DBus (signature = "a(asaasay)")] out Variant events,
+ out double[] relevancies,
+ out uint matches) throws Error;
}
/* Because of a Vala bug we have to define the proxy interface outside of
@@ -55,6 +63,7 @@
private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer";
private RemoteSimpleIndexer siin;
+ private bool siin_connection_failed = false;
private uint registration_id;
private MonitorManager? notifier;
@@ -67,6 +76,8 @@
{
if (Utils.using_in_memory_database ()) return;
+ // FIXME: check dbus and see if fts is installed?
+
// installing a monitor from the daemon will ensure that we don't
// miss any notifications that would be emitted in between
// zeitgeist start and fts daemon start
@@ -109,23 +120,40 @@
try
{
siin = conn.get_proxy.end<RemoteSimpleIndexer> (res);
+ siin_connection_failed = false;
}
catch (IOError err)
{
+ siin_connection_failed = true;
warning ("%s", err.message);
}
}
- public async void search (string query_string, Variant time_range,
- Variant filter_templates, uint offset, uint count, uint result_type,
- out Variant events, out uint matches) throws Error
+ public async void wait_for_proxy () throws Error
{
+ int i = 0;
+ while (this.siin == null && i < 6 && !siin_connection_failed)
+ {
+ Timeout.add_full (Priority.DEFAULT_IDLE, 250,
+ wait_for_proxy.callback);
+ i++;
+ yield;
+ }
+
if (siin == null || !(siin is DBusProxy))
{
// FIXME: queue until we have the proxy
throw new EngineError.DATABASE_ERROR (
"Not connected to SimpleIndexer");
}
+ }
+
+ public async void search (string query_string, Variant time_range,
+ Variant filter_templates, uint offset, uint count, uint result_type,
+ out Variant events, out uint matches) throws Error
+ {
+ if (siin == null) yield wait_for_proxy ();
+
var timer = new Timer ();
yield siin.search (query_string, time_range, filter_templates,
offset, count, result_type,
@@ -134,6 +162,24 @@
(uint) events.n_children (), matches, timer.elapsed ());
}
+ public async void search_with_relevancies (
+ string query_string, Variant time_range,
+ Variant filter_templates, uint offset, uint count, uint result_type,
+ out Variant events, out double[] relevancies, out uint matches)
+ throws Error
+ {
+ if (siin == null) yield wait_for_proxy ();
+
+ var timer = new Timer ();
+ yield siin.search_with_relevancies (
+ query_string, time_range, filter_templates,
+ offset, count, result_type,
+ out events, out relevancies, out matches);
+
+ debug ("Got %u[/%u] results from indexer (in %f seconds)",
+ (uint) events.n_children (), matches, timer.elapsed ());
+ }
+
}
[ModuleInit]
=== modified file 'src/remote.vala'
--- src/remote.vala 2012-02-05 14:52:13 +0000
+++ src/remote.vala 2012-02-09 18:34:36 +0000
@@ -121,6 +121,13 @@
uint offset, uint count, uint result_type,
[DBus (signature = "a(asaasay)")] out Variant events,
out uint matches) throws Error;
+ public abstract async void search_with_relevancies (
+ string query_string,
+ [DBus (signature = "(xx)")] Variant time_range,
+ [DBus (signature = "a(asaasay)")] Variant filter_templates,
+ uint offset, uint count, uint result_type,
+ [DBus (signature = "a(asaasay)")] out Variant events,
+ out double[] relevancies, out uint matches) throws Error;
}
/* FIXME: Remove this! Only here because of a bug in Vala (see ext-fts) */