← Back to team overview

zeitgeist team mailing list archive

[Merge] lp:~rainct/zeitgeist/collapse-uri into lp:zeitgeist

 

Siegfried Gevatter has proposed merging lp:~rainct/zeitgeist/collapse-uri into lp:zeitgeist.

Requested reviews:
  Zeitgeist Framework Team (zeitgeist)

For more details, see:
https://code.launchpad.net/~rainct/zeitgeist/collapse-uri/+merge/95994
-- 
https://code.launchpad.net/~rainct/zeitgeist/collapse-uri/+merge/95994
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~rainct/zeitgeist/collapse-uri into lp:zeitgeist.
=== modified file 'extensions/fts++/indexer.cpp'
--- extensions/fts++/indexer.cpp	2012-02-14 16:56:04 +0000
+++ extensions/fts++/indexer.cpp	2012-03-05 20:44:22 +0000
@@ -23,6 +23,7 @@
 #include <xapian.h>
 #include <queue>
 #include <vector>
+#include <cassert>
 
 #include <gio/gio.h>
 #include <gio/gdesktopappinfo.h>
@@ -42,6 +43,7 @@
 
 const Xapian::valueno VALUE_EVENT_ID = 0;
 const Xapian::valueno VALUE_TIMESTAMP = 1;
+const Xapian::valueno VALUE_URI_HASH = 2;
 
 #define QUERY_PARSER_FLAGS \
   Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \
@@ -100,6 +102,11 @@
     this->query_parser->set_database (*this->db);
 
     this->enquire = new Xapian::Enquire (*this->db);
+    
+    assert (g_checksum_type_get_length (G_CHECKSUM_MD5) == 16);
+    this->checksum = g_checksum_new (G_CHECKSUM_MD5);
+    if (!this->checksum)
+        g_critical ("GChecksum initialization failed.");
 
   }
   catch (const Xapian::Error &xp_error)
@@ -727,7 +734,11 @@
     guint maxhits;
     if (result_type == 100 ||
         result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
-        result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
+        result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
     {
       maxhits = count;
     }
@@ -745,6 +756,14 @@
       enquire->set_sort_by_value (VALUE_TIMESTAMP, true);
     }
 
+    if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
+        result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
+    {
+        enquire->set_collapse_key (VALUE_URI_HASH);
+    }
+
     Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS));
     enquire->set_query (q);
     Xapian::MSet hits (enquire->get_mset (offset, maxhits));
@@ -988,6 +1007,19 @@
         return; // ignore this event completely...
       }
 
+      // We need the subject URI so we can use Xapian's collapse key feature
+      // for *_SUBJECT grouping. However, to save space, we'll just save a hash.
+      // A better option would be using URI's id, but for that we'd need a SQL
+      // query that'd be subject to races.
+      // FIXME(?): This doesn't work for events with multiple subjects.
+      g_checksum_update (checksum, (guchar *) uri.c_str (), -1);
+      guint8 uri_hash[17];
+      gsize hash_size = 16;
+      g_checksum_get_digest (checksum, uri_hash, &hash_size);
+      assert (hash_size == 16);
+      doc.add_value (VALUE_URI_HASH, std::string((char *) uri_hash, 16));
+      g_checksum_reset (checksum);
+
       val = zeitgeist_subject_get_text (subject);
       if (val && val[0] != '\0')
       {

=== modified file 'extensions/fts++/indexer.h'
--- extensions/fts++/indexer.h	2012-02-14 16:56:04 +0000
+++ extensions/fts++/indexer.h	2012-03-05 20:44:22 +0000
@@ -21,6 +21,7 @@
 #define _ZGFTS_INDEXER_H_
 
 #include <glib-object.h>
+#include <glib/gchecksum.h>
 #include <gio/gio.h>
 #include <xapian.h>
 
@@ -42,6 +43,7 @@
     , query_parser (NULL)
     , enquire (NULL)
     , tokenizer (NULL)
+    , checksum (NULL)
     , clear_failed_id (0)
   {
     const gchar *home_dir = g_get_home_dir ();
@@ -54,6 +56,7 @@
     if (enquire) delete enquire;
     if (query_parser) delete query_parser;
     if (db) delete db;
+    if (checksum) { g_checksum_free (checksum); checksum = NULL; }
 
     for (AppInfoMap::iterator it = app_info_cache.begin ();
          it != app_info_cache.end (); ++it)
@@ -120,6 +123,7 @@
   Xapian::TermGenerator    *tokenizer;
   AppInfoMap                app_info_cache;
   ApplicationSet            failed_lookups;
+  GChecksum                 *checksum;
 
   guint                     clear_failed_id;
   std::string               home_dir_path;


Follow ups