← Back to team overview

zeitgeist team mailing list archive

[Merge] lp:~mhr3/zeitgeist-extensions/scheme-detection into lp:zeitgeist-extensions

 

Michal Hruby has proposed merging lp:~mhr3/zeitgeist-extensions/scheme-detection into lp:zeitgeist-extensions.

Requested reviews:
  Zeitgeist Extensions (zeitgeist-extensions)

For more details, see:
https://code.launchpad.net/~mhr3/zeitgeist-extensions/scheme-detection/+merge/77481


-- 
https://code.launchpad.net/~mhr3/zeitgeist-extensions/scheme-detection/+merge/77481
Your team Zeitgeist Extensions is requested to review the proposed merge of lp:~mhr3/zeitgeist-extensions/scheme-detection into lp:zeitgeist-extensions.
=== modified file 'fts/fts.py'
--- fts/fts.py	2011-09-07 08:42:40 +0000
+++ fts/fts.py	2011-09-29 08:47:25 +0000
@@ -62,6 +62,7 @@
 log = logging.getLogger("zeitgeist.fts")
 
 INDEX_FILE = os.path.join(constants.DATA_PATH, "fts.index")
+INDEX_VERSION = "1"
 INDEX_LOCK = threading.Lock()
 FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity"
 FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index"
@@ -320,6 +321,9 @@
 			# force of a reindex
 			log.info("Index built without CJK support. Upgrading index")
 			self._queue.put(Reindex(self._engine))
+		elif self._index.get_metadata("fts_index_version") != INDEX_VERSION:
+			log.info("Index must be upgraded. Doing full rebuild")
+			self._queue.put(Reindex(self._engine))
 		elif self._index.get_doccount() == 0:
 			# If the index is empty we trigger a rebuild
 			# We must delay reindexing until after the engine is done setting up
@@ -472,7 +476,8 @@
 		self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
 		self._query_parser.set_database (self._index)
 		self._enquire = xapian.Enquire(self._index)
-		
+	
+		self._index.set_metadata("fts_index_version", INDEX_VERSION)
 		# Register that this index was built with CJK enabled
 		if "XAPIAN_CJK_NGRAM" in os.environ :
 			self._index.set_metadata("cjk_ngram", "1")
@@ -607,7 +612,7 @@
 		# usually web URIs, are indexed in another way because there may
 		# be domain name etc. in there we want to rank differently
 		scheme, host, path = self._split_uri (url_unescape (uri))
-		if scheme == "file://" or not scheme:
+		if scheme == "file" or not scheme:
 			path, name = os.path.split(path)
 			self._tokenizer.index_text(name, 5)
 			self._tokenizer.index_text(name, 5, "N")
@@ -617,9 +622,9 @@
 			while path and name:
 				weight = weight / 1.5
 				path, name = os.path.split(path)
-				self._tokenizer.index_text(name, weight)
+				self._tokenizer.index_text(name, int(weight))
 			
-		elif scheme == "mailto:":
+		elif scheme == "mailto":
 			tokens = host.split("@")
 			name = tokens[0]
 			self._tokenizer.index_text(name, 6)