launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #00196
[Merge] lp:~lifeless/launchpad/malone into lp:launchpad/devel
Robert Collins has proposed merging lp:~lifeless/launchpad/malone into lp:launchpad/devel.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
#607776 +filebug-show-similar FTI query timeouts
https://bugs.launchpad.net/bugs/607776
Do less work searching, so searching is faster. Faster searching -> less load -> less timeouts. We all win. Winning is good. Goodness is good.
--
https://code.launchpad.net/~lifeless/launchpad/malone/+merge/30507
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/launchpad/malone into lp:launchpad/devel.
=== modified file 'lib/canonical/database/nl_search.py'
--- lib/canonical/database/nl_search.py 2009-06-25 05:30:52 +0000
+++ lib/canonical/database/nl_search.py 2010-07-21 10:52:48 +0000
@@ -36,7 +36,58 @@
def nl_phrase_search(phrase, table, constraints='',
- extra_constraints_tables=None):
+ extra_constraints_tables=None,
+ fast_enabled=True):
+ """Return the tsearch2 query that should be use to do a phrase search.
+
+ The precise heuristics applied by this function will vary as we tune
+ the system.
+
+ It is the interface by which a user query should be turned into a backend
+ search language query.
+
+ Caveats: The model class must define a 'fti' column which is then used used
+ for full text searching.
+
+ :param phrase: A search phrase.
+ :param table: This should be the SQLBase class representing the base type.
+ :param constraints: Additional SQL clause that limits the rows to a subset
+ of the table.
+ :param extra_constraints_tables: A list of additional table names that are
+ needed by the constraints clause.
+ :param fast_enabled: If true use a fast, but less precise, code path. When
+ feature flags are available this will be converted to a feature flag.
+ :return: A tsearch2 query string.
+ """
+ terms = nl_term_candidates(phrase)
+ if len(terms) == 0:
+ return ''
+ if fast_enabled:
+ return _nl_phrase_search(terms, table, constraints,
+ extra_constraints_tables)
+ else:
+ return _slow_nl_phrase_search(terms, table, constraints,
+ extra_constraints_tables)
+
+
+def _nl_phrase_search(terms, table, constraints, extra_constraints_tables):
+ """Perform a very simple pruning of the phrase, letting fti do ranking.
+
+ See nl_phrase_search for the contract of this function.
+ """
+ bad_words = set(['ubuntu', 'launchpad'])
+ terms = set(terms)
+ filtered_terms = []
+ for term in terms:
+ if term.lower() in bad_words:
+ continue
+ filtered_terms.append(term)
+ # sorted for testing convenience.
+ return '|'.join(sorted(filtered_terms))
+
+
+def _slow_nl_phrase_search(terms, table, constraints,
+ extra_constraints_tables):
"""Return the tsearch2 query that should be use to do a phrase search.
This function implement an algorithm similar to the one used by MySQL
@@ -56,7 +107,7 @@
closer in the text at the top of the list, while still returning rows that
use only some of the terms.
- :phrase: A search phrase.
+ :terms: Some candidate search terms.
:table: This should be the SQLBase class representing the base type.
@@ -66,14 +117,12 @@
:extra_constraints_tables: A list of additional table names that are
needed by the constraints clause.
- Caveat: The SQLBase class must define a 'fti' column .
- This is the column that is used for full text searching.
+ Caveat: The model class must define a 'fti' column which is then used used
+ for full text searching.
"""
total = table.select(
constraints, clauseTables=extra_constraints_tables).count()
- term_candidates = nl_term_candidates(phrase)
- if len(term_candidates) == 0:
- return ''
+ term_candidates = terms
if total < 5:
return '|'.join(term_candidates)
=== modified file 'lib/canonical/launchpad/doc/textsearching.txt'
--- lib/canonical/launchpad/doc/textsearching.txt 2009-07-23 17:49:31 +0000
+++ lib/canonical/launchpad/doc/textsearching.txt 2010-07-21 10:52:48 +0000
@@ -588,7 +588,12 @@
To get the actual tsearch2 query that should be run, you will use the
nl_phrase_search() function. This one takes two mandatory parameters and
-two optional ones. You pass in the search phrase and a SQLObject class.
+two optional ones. You pass in the search phrase and a database model class.
+
+The original nl_phrase_search has proved slow, so there are now two
+implementations in the core.
+
+First we describe the slow implementation.
The select method of that class will be use to count the number of rows
that is matched by each term. Term matching 50% or more of the total
@@ -608,7 +613,8 @@
So firefox will be removed from the final query:
- >>> nl_phrase_search('system is slow when running firefox', Question)
+ >>> nl_phrase_search('system is slow when running firefox', Question,
+ ... fast_enabled=False)
u'system|slow|run'
>>> nl_term_candidates('how do I do this?')
@@ -616,6 +622,12 @@
>>> nl_phrase_search('how do I do this?', Question)
''
+The fast code path only removes ubuntu and launchpad today:
+
+ >>> nl_phrase_search('system is slow when running firefox on ubuntu',
+ ... Question)
+ u'firefox|run|slow|system'
+
==== Using other constraints ====
@@ -640,13 +652,13 @@
>>> nl_phrase_search(
... 'firefox gets very slow on flickr', Question,
... "Question.product = %s AND Product.active = 't'" % firefox_product.id,
- ... ['Product'])
+ ... ['Product'], fast_enabled=False)
u'slow|flickr'
When the query only has stop words or common words in it, the returned
query will be the empty string:
- >>> nl_phrase_search('firefox will not do it', Question)
+ >>> nl_phrase_search('ubuntu will not do it', Question)
''
When there are no candidate rows, only stemming and stop words removal
@@ -656,7 +668,7 @@
0
>>> nl_phrase_search('firefox is very slow on flickr', Question,
... 'product = -1')
- u'firefox|slow|flickr'
+ u'firefox|flickr|slow'
==== No keywords filtering with few rows ====
=== modified file 'lib/lp/bugs/model/bugtask.py'
--- lib/lp/bugs/model/bugtask.py 2010-07-08 13:10:41 +0000
+++ lib/lp/bugs/model/bugtask.py 2010-07-21 10:52:48 +0000
@@ -29,6 +29,7 @@
from storm.expr import And, Alias, AutoTables, In, Join, LeftJoin, Or, SQL
from storm.sqlobject import SQLObjectResultSet
+from storm.store import EmptyResultSet
from storm.zope.interfaces import IResultSet, ISQLObjectResultSet
import pytz
@@ -1462,6 +1463,8 @@
def findSimilar(self, user, summary, product=None, distribution=None,
sourcepackagename=None):
"""See `IBugTaskSet`."""
+ if not summary:
+ return EmptyResultSet()
# Avoid circular imports.
from lp.bugs.model.bug import Bug
search_params = BugTaskSearchParams(user)
@@ -1482,9 +1485,6 @@
else:
raise AssertionError('Need either a product or distribution.')
- if not summary:
- return BugTask.select('1 = 2')
-
search_params.fast_searchtext = nl_phrase_search(
summary, Bug, ' AND '.join(constraint_clauses), ['BugTask'])
return self.search(search_params)
@@ -2069,7 +2069,6 @@
', '.join(tables), ' AND '.join(clauses))
return clause
-
def search(self, params, *args):
"""See `IBugTaskSet`."""
store_selector = getUtility(IStoreSelector)
=== modified file 'lib/lp/soyuz/model/binarypackagebuild.py'
--- lib/lp/soyuz/model/binarypackagebuild.py 2010-07-18 00:56:16 +0000
+++ lib/lp/soyuz/model/binarypackagebuild.py 2010-07-21 10:52:48 +0000
@@ -12,6 +12,7 @@
import operator
from storm.locals import Int, Reference
+from storm.store import EmptyResultSet
from zope.interface import implements
from zope.component import getUtility
@@ -882,10 +883,7 @@
"""See `IBinaryPackageBuildSet`."""
# If not distroarchseries was found return empty list
if not arch_ids:
- # XXX cprov 2006-09-08: returning and empty SelectResult to make
- # the callsites happy as bjorn suggested. However it would be
- # much clearer if we have something like SQLBase.empty() for this
- return BinaryPackageBuild.select("2=1")
+ return EmptyResultSet()
clauseTables = []