← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~lifeless/launchpad/malone into lp:launchpad/devel

 

Robert Collins has proposed merging lp:~lifeless/launchpad/malone into lp:launchpad/devel.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  #607776 +filebug-show-similar FTI query timeouts
  https://bugs.launchpad.net/bugs/607776


Do less work searching, so searching is faster. Faster searching -> less load -> less timeouts. We all win. Winning is good. Goodness is good.
-- 
https://code.launchpad.net/~lifeless/launchpad/malone/+merge/30507
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/launchpad/malone into lp:launchpad/devel.
=== modified file 'lib/canonical/database/nl_search.py'
--- lib/canonical/database/nl_search.py	2009-06-25 05:30:52 +0000
+++ lib/canonical/database/nl_search.py	2010-07-21 10:52:48 +0000
@@ -36,7 +36,58 @@
 
 
 def nl_phrase_search(phrase, table, constraints='',
-                     extra_constraints_tables=None):
+                     extra_constraints_tables=None,
+                     fast_enabled=True):
+    """Return the tsearch2 query that should be use to do a phrase search.
+
+    The precise heuristics applied by this function will vary as we tune
+    the system.
+
+    It is the interface by which a user query should be turned into a backend
+    search language query.
+
+    Caveats: The model class must define a 'fti' column which is then used used
+    for full text searching.
+
+    :param phrase: A search phrase.
+    :param table: This should be the SQLBase class representing the base type.
+    :param constraints: Additional SQL clause that limits the rows to a subset
+        of the table.
+    :param extra_constraints_tables: A list of additional table names that are
+        needed by the constraints clause.
+    :param fast_enabled: If true use a fast, but less precise, code path. When
+        feature flags are available this will be converted to a feature flag.
+    :return: A tsearch2 query string.
+    """
+    terms = nl_term_candidates(phrase)
+    if len(terms) == 0:
+        return ''
+    if fast_enabled:
+        return _nl_phrase_search(terms, table, constraints,
+            extra_constraints_tables)
+    else:
+        return _slow_nl_phrase_search(terms, table, constraints,
+            extra_constraints_tables)
+
+
+def _nl_phrase_search(terms, table, constraints, extra_constraints_tables):
+    """Perform a very simple pruning of the phrase, letting fti do ranking.
+
+    See nl_phrase_search for the contract of this function.
+    """
+    bad_words = set(['ubuntu', 'launchpad'])
+    terms = set(terms)
+    filtered_terms = []
+    for term in terms:
+        if term.lower() in bad_words:
+            continue
+        filtered_terms.append(term)
+    # sorted for testing convenience.
+    return '|'.join(sorted(filtered_terms))
+
+
+def _slow_nl_phrase_search(terms, table, constraints,
+    extra_constraints_tables):
     """Return the tsearch2 query that should be use to do a phrase search.
 
     This function implement an algorithm similar to the one used by MySQL
@@ -56,7 +107,7 @@
     closer in the text at the top of the list, while still returning rows that
     use only some of the terms.
 
-    :phrase: A search phrase.
+    :terms: Some candidate search terms.
 
     :table: This should be the SQLBase class representing the base type.
 
@@ -66,14 +117,12 @@
     :extra_constraints_tables: A list of additional table names that are
     needed by the constraints clause.
 
-    Caveat: The SQLBase class must define a 'fti' column .
-    This is the column that is used for full text searching.
+    Caveat: The model class must define a 'fti' column which is then used used
+    for full text searching.
     """
     total = table.select(
         constraints, clauseTables=extra_constraints_tables).count()
-    term_candidates = nl_term_candidates(phrase)
-    if len(term_candidates) == 0:
-        return ''
+    term_candidates = terms
     if total < 5:
         return '|'.join(term_candidates)
 

=== modified file 'lib/canonical/launchpad/doc/textsearching.txt'
--- lib/canonical/launchpad/doc/textsearching.txt	2009-07-23 17:49:31 +0000
+++ lib/canonical/launchpad/doc/textsearching.txt	2010-07-21 10:52:48 +0000
@@ -588,7 +588,12 @@
 
 To get the actual tsearch2 query that should be run, you will use the
 nl_phrase_search() function. This one takes two mandatory parameters and
-two optional ones. You pass in the search phrase and a SQLObject class.
+two optional ones. You pass in the search phrase and a database model class.
+
+The original nl_phrase_search has proved slow, so there are now two
+implementations in the core.
+
+First we describe the slow implementation.
 
 The select method of that class will be use to count the number of rows
 that is matched by each term. Term matching 50% or more of the total
@@ -608,7 +613,8 @@
 
 So firefox will be removed from the final query:
 
-    >>> nl_phrase_search('system is slow when running firefox', Question)
+    >>> nl_phrase_search('system is slow when running firefox', Question,
+    ...     fast_enabled=False)
     u'system|slow|run'
 
     >>> nl_term_candidates('how do I do this?')
@@ -616,6 +622,12 @@
     >>> nl_phrase_search('how do I do this?', Question)
     ''
 
+The fast code path only removes ubuntu and launchpad today:
+
+    >>> nl_phrase_search('system is slow when running firefox on ubuntu',
+    ...     Question)
+    u'firefox|run|slow|system'
+
 
 ==== Using other constraints ====
 
@@ -640,13 +652,13 @@
     >>> nl_phrase_search(
     ...     'firefox gets very slow on flickr', Question,
     ...     "Question.product = %s AND Product.active = 't'" % firefox_product.id,
-    ...     ['Product'])
+    ...     ['Product'], fast_enabled=False)
     u'slow|flickr'
 
 When the query only has stop words or common words in it, the returned
 query will be the empty string:
 
-    >>> nl_phrase_search('firefox will not do it', Question)
+    >>> nl_phrase_search('ubuntu will not do it', Question)
     ''
 
 When there are no candidate rows, only stemming and stop words removal
@@ -656,7 +668,7 @@
     0
     >>> nl_phrase_search('firefox is very slow on flickr', Question,
     ...                  'product = -1')
-    u'firefox|slow|flickr'
+    u'firefox|flickr|slow'
 
 
 ==== No keywords filtering with few rows ====

=== modified file 'lib/lp/bugs/model/bugtask.py'
--- lib/lp/bugs/model/bugtask.py	2010-07-08 13:10:41 +0000
+++ lib/lp/bugs/model/bugtask.py	2010-07-21 10:52:48 +0000
@@ -29,6 +29,7 @@
 
 from storm.expr import And, Alias, AutoTables, In, Join, LeftJoin, Or, SQL
 from storm.sqlobject import SQLObjectResultSet
+from storm.store import EmptyResultSet
 from storm.zope.interfaces import IResultSet, ISQLObjectResultSet
 
 import pytz
@@ -1462,6 +1463,8 @@
     def findSimilar(self, user, summary, product=None, distribution=None,
                     sourcepackagename=None):
         """See `IBugTaskSet`."""
+        if not summary:
+            return EmptyResultSet()
         # Avoid circular imports.
         from lp.bugs.model.bug import Bug
         search_params = BugTaskSearchParams(user)
@@ -1482,9 +1485,6 @@
         else:
             raise AssertionError('Need either a product or distribution.')
 
-        if not summary:
-            return BugTask.select('1 = 2')
-
         search_params.fast_searchtext = nl_phrase_search(
             summary, Bug, ' AND '.join(constraint_clauses), ['BugTask'])
         return self.search(search_params)
@@ -2069,7 +2069,6 @@
             ', '.join(tables), ' AND '.join(clauses))
         return clause
 
-
     def search(self, params, *args):
         """See `IBugTaskSet`."""
         store_selector = getUtility(IStoreSelector)

=== modified file 'lib/lp/soyuz/model/binarypackagebuild.py'
--- lib/lp/soyuz/model/binarypackagebuild.py	2010-07-18 00:56:16 +0000
+++ lib/lp/soyuz/model/binarypackagebuild.py	2010-07-21 10:52:48 +0000
@@ -12,6 +12,7 @@
 import operator
 
 from storm.locals import Int, Reference
+from storm.store import EmptyResultSet
 
 from zope.interface import implements
 from zope.component import getUtility
@@ -882,10 +883,7 @@
         """See `IBinaryPackageBuildSet`."""
         # If not distroarchseries was found return empty list
         if not arch_ids:
-            # XXX cprov 2006-09-08: returning and empty SelectResult to make
-            # the callsites happy as bjorn suggested. However it would be
-            # much clearer if we have something like SQLBase.empty() for this
-            return BinaryPackageBuild.select("2=1")
+            return EmptyResultSet()
 
         clauseTables = []