launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #08750
[Merge] lp:~wgrant/launchpad/package-search-timeouts into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/package-search-timeouts into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #739051 in Launchpad itself: "Product:+index timeouts"
https://bugs.launchpad.net/launchpad/+bug/739051
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/package-search-timeouts/+merge/109997
This branch reworks Distribution.searchSourcePackageCaches to be sub-200ms on DF. It relies on the indices in lp:~wgrant/launchpad/dspc-trgm-indices for optimal speed.
The name ILIKE '%foo%' changes to a LIKE, since DSPC.name is always lowercase -- this is indexable by the new GIN trigram index. SPPH.archive is now constrained, to be more correct and to let it use the new index. SPPH.sourcepackagename now also joins directly onto DSPC.sourcepackagename, rather than indirecting through SPR and SPN.
--
https://code.launchpad.net/~wgrant/launchpad/package-search-timeouts/+merge/109997
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/package-search-timeouts into lp:launchpad.
=== modified file 'lib/lp/registry/doc/distribution.txt'
--- lib/lp/registry/doc/distribution.txt 2012-04-10 14:01:17 +0000
+++ lib/lp/registry/doc/distribution.txt 2012-06-13 08:23:27 +0000
@@ -182,7 +182,7 @@
The distribution also allows you to look for source packages that match
a certain string through the magic of fti. For instance:
- >>> packages = ubuntu.searchSourcePackageCaches("mozilla")
+ >>> packages = ubuntu.searchSourcePackageCaches(u"mozilla")
>>> for distro_source_package_cache, source_name, rank in packages:
... print "%-17s rank:%s" % (
... distro_source_package_cache.name,
@@ -192,10 +192,10 @@
The search also matches on exact package names which fti doesn't like,
and even on substrings:
- >>> packages = ubuntu.searchSourcePackageCaches("linux-source-2.6.15")
+ >>> packages = ubuntu.searchSourcePackageCaches(u"linux-source-2.6.15")
>>> print packages.count()
1
- >>> packages = ubuntu.searchSourcePackageCaches('a')
+ >>> packages = ubuntu.searchSourcePackageCaches(u'a')
>>> for distro_source_package_cache, source_name, rank in packages:
... print "%s: %-17s rank:%s" % (
... distro_source_package_cache.__class__.__name__,
@@ -210,7 +210,7 @@
The searchSourcePackages() method just returns a decorated version
of the results from searchSourcePackageCaches():
- >>> packages = ubuntu.searchSourcePackages('a')
+ >>> packages = ubuntu.searchSourcePackages(u'a')
>>> for dsp in packages:
... print "%s: %s" % (dsp.__class__.__name__, dsp.name)
DistributionSourcePackage: alsa-utils
@@ -224,13 +224,13 @@
the results based on whether the source package has an entry
in the Packaging table linking it to an upstream project.
- >>> packages = ubuntu.searchSourcePackages('a', has_packaging=True)
+ >>> packages = ubuntu.searchSourcePackages(u'a', has_packaging=True)
>>> for dsp in packages:
... print "%s: %s" % (dsp.__class__.__name__, dsp.name)
DistributionSourcePackage: alsa-utils
DistributionSourcePackage: mozilla-firefox
DistributionSourcePackage: netapplet
- >>> packages = ubuntu.searchSourcePackages('a', has_packaging=False)
+ >>> packages = ubuntu.searchSourcePackages(u'a', has_packaging=False)
>>> for dsp in packages:
... print "%s: %s" % (dsp.__class__.__name__, dsp.name)
DistributionSourcePackage: commercialpackage
@@ -242,7 +242,7 @@
SourcePackagePublishingHistory table for the given distroseries.
>>> packages = ubuntu.searchSourcePackages(
- ... 'a', publishing_distroseries=ubuntu.currentseries)
+ ... u'a', publishing_distroseries=ubuntu.currentseries)
>>> for dsp in packages:
... print "%s: %s" % (dsp.__class__.__name__, dsp.name)
DistributionSourcePackage: alsa-utils
=== modified file 'lib/lp/registry/model/distribution.py'
--- lib/lp/registry/model/distribution.py 2012-06-08 02:57:12 +0000
+++ lib/lp/registry/model/distribution.py 2012-06-13 08:23:27 +0000
@@ -24,12 +24,15 @@
)
from sqlobject.sqlbuilder import SQLConstant
from storm.info import ClassAlias
-from storm.locals import (
+from storm.expr import (
And,
Desc,
+ Exists,
Join,
Max,
+ Not,
Or,
+ Select,
SQL,
)
from storm.store import Store
@@ -1086,6 +1089,7 @@
def searchSourcePackageCaches(
self, text, has_packaging=None, publishing_distroseries=None):
"""See `IDistribution`."""
+ from lp.registry.model.packaging import Packaging
from lp.soyuz.model.distributionsourcepackagecache import (
DistributionSourcePackageCache,
)
@@ -1097,58 +1101,58 @@
find_spec = (
DistributionSourcePackageCache,
SourcePackageName,
- SQL('rank(fti, ftq(%s)) AS rank' % sqlvalues(text)),
+ SQL('rank(fti, ftq(?)) AS rank', params=(text,)),
)
origin = [
DistributionSourcePackageCache,
Join(
SourcePackageName,
DistributionSourcePackageCache.sourcepackagename ==
- SourcePackageName.id,
+ SourcePackageName.id),
+ ]
+
+ # quote_like SQL-escapes the string in addition to LIKE-escaping
+ # it, so we can't use params=. So we need to double-escape the %
+ # on either side of the string: once to survive the formatting
+ # here, and once to survive Storm's formatting during
+ # compilation. Storm should really %-escape literal SQL strings,
+ # but it doesn't.
+ conditions = [
+ DistributionSourcePackageCache.distribution == self,
+ DistributionSourcePackageCache.archiveID.is_in(
+ self.all_distro_archive_ids),
+ Or(
+ SQL("DistributionSourcePackageCache.fti @@ ftq(?)",
+ params=(text,)),
+ SQL("DistributionSourcePackageCache.name "
+ "LIKE '%%%%' || %s || '%%%%'" % quote_like(text.lower())),
),
]
- publishing_condition = ''
+ if has_packaging is not None:
+ packaging_query = Exists(Select(
+ 1, tables=[Packaging],
+ where=(Packaging.sourcepackagenameID == SourcePackageName.id)))
+ if has_packaging is False:
+ packaging_query = Not(packaging_query)
+ conditions.append(packaging_query)
+
if publishing_distroseries is not None:
- origin += [
- Join(SourcePackageRelease,
- SourcePackageRelease.sourcepackagename ==
- SourcePackageName.id),
- Join(SourcePackagePublishingHistory,
- SourcePackagePublishingHistory.sourcepackagerelease ==
- SourcePackageRelease.id),
- ]
- publishing_condition = (
- "AND SourcePackagePublishingHistory.distroseries = %d"
- % publishing_distroseries.id)
-
- packaging_query = """
- SELECT 1
- FROM Packaging
- WHERE Packaging.sourcepackagename = SourcePackageName.id
- """
- has_packaging_condition = ''
- if has_packaging is True:
- has_packaging_condition = 'AND EXISTS (%s)' % packaging_query
- elif has_packaging is False:
- has_packaging_condition = 'AND NOT EXISTS (%s)' % packaging_query
-
- # Note: When attempting to convert the query below into straight
- # Storm expressions, a 'tuple index out-of-range' error was always
- # raised.
- condition = """
- DistributionSourcePackageCache.distribution = %s AND
- DistributionSourcePackageCache.archive IN %s AND
- (DistributionSourcePackageCache.fti @@ ftq(%s) OR
- DistributionSourcePackageCache.name ILIKE '%%' || %s || '%%')
- %s
- %s
- """ % (quote(self), quote(self.all_distro_archive_ids),
- quote(text), quote_like(text), has_packaging_condition,
- publishing_condition)
+ origin.append(
+ Join(
+ SourcePackagePublishingHistory,
+ SourcePackagePublishingHistory.sourcepackagenameID ==
+ DistributionSourcePackageCache.sourcepackagenameID))
+ conditions.extend([
+ SourcePackagePublishingHistory.distroseries ==
+ publishing_distroseries,
+ SourcePackagePublishingHistory.archiveID.is_in(
+ self.all_distro_archive_ids),
+ ])
+
dsp_caches_with_ranks = store.using(*origin).find(
- find_spec, condition).order_by(
- 'rank DESC, DistributionSourcePackageCache.name')
+ find_spec, *conditions).order_by(
+ Desc(SQL('rank')), DistributionSourcePackageCache.name)
dsp_caches_with_ranks.config(distinct=True)
return dsp_caches_with_ranks
=== modified file 'lib/lp/soyuz/doc/package-cache-script.txt'
--- lib/lp/soyuz/doc/package-cache-script.txt 2012-05-03 20:26:29 +0000
+++ lib/lp/soyuz/doc/package-cache-script.txt 2012-06-13 08:23:27 +0000
@@ -19,16 +19,16 @@
'cdrkit' source and binary are published but it's not present in
cache:
- >>> ubuntu.searchSourcePackages('cdrkit').count()
+ >>> ubuntu.searchSourcePackages(u'cdrkit').count()
0
- >>> warty.searchPackages('cdrkit').count()
+ >>> warty.searchPackages(u'cdrkit').count()
0
'foobar' source and binary are removed but still present in cache:
- >>> ubuntu.searchSourcePackages('foobar').count()
+ >>> ubuntu.searchSourcePackages(u'foobar').count()
1
- >>> warty.searchPackages('foobar').count()
+ >>> warty.searchPackages(u'foobar').count()
1
Normal operation produces INFO level information about the
@@ -62,14 +62,14 @@
Now, search results are up to date:
- >>> ubuntu.searchSourcePackages('cdrkit').count()
+ >>> ubuntu.searchSourcePackages(u'cdrkit').count()
1
- >>> warty.searchPackages('cdrkit').count()
+ >>> warty.searchPackages(u'cdrkit').count()
1
- >>> ubuntu.searchSourcePackages('foobar').count()
+ >>> ubuntu.searchSourcePackages(u'foobar').count()
0
- >>> warty.searchPackages('foobar').count()
+ >>> warty.searchPackages(u'foobar').count()
0
Explicitly mark the database as dirty so that it is cleaned (see bug 994158).
=== modified file 'lib/lp/soyuz/doc/package-cache.txt'
--- lib/lp/soyuz/doc/package-cache.txt 2012-01-24 12:36:15 +0000
+++ lib/lp/soyuz/doc/package-cache.txt 2012-06-13 08:23:27 +0000
@@ -81,13 +81,13 @@
Building these caches we can reach good performance on full and partial
term searching.
- >>> ubuntu.searchSourcePackages('mozilla').count()
- 1
-
- >>> ubuntu.searchSourcePackages('moz').count()
- 1
-
- >>> ubuntu.searchSourcePackages('biscoito').count()
+ >>> ubuntu.searchSourcePackages(u'mozilla').count()
+ 1
+
+ >>> ubuntu.searchSourcePackages(u'moz').count()
+ 1
+
+ >>> ubuntu.searchSourcePackages(u'biscoito').count()
0
The cache update procedure is done by cronscripts/update-pkgcache.py,
@@ -109,7 +109,7 @@
>>> foobar_pub.status.name
'DELETED'
- >>> ubuntu.searchSourcePackages('foobar').count()
+ >>> ubuntu.searchSourcePackages(u'foobar').count()
1
>>> foobar_cache = DistributionSourcePackageCache.selectOneBy(
@@ -131,7 +131,7 @@
>>> import transaction
>>> transaction.commit()
- >>> ubuntu.searchSourcePackages('foobar').count()
+ >>> ubuntu.searchSourcePackages(u'foobar').count()
0
>>> foobar_cache = DistributionSourcePackageCache.selectOneBy(
@@ -149,7 +149,7 @@
>>> cdrkit_pub.status.name
'PUBLISHED'
- >>> ubuntu.searchSourcePackages('cdrkit').count()
+ >>> ubuntu.searchSourcePackages(u'cdrkit').count()
0
>>> cdrkit_cache = DistributionSourcePackageCache.selectOneBy(
@@ -176,7 +176,7 @@
Now we see that the 'cdrkit' source is part of the caches and can be
reached via searches:
- >>> ubuntu.searchSourcePackages('cdrkit').count()
+ >>> ubuntu.searchSourcePackages(u'cdrkit').count()
1
>>> cdrkit_cache = DistributionSourcePackageCache.selectOneBy(
Follow ups