launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #02593
[Merge] lp:~wgrant/launchpad/bug-718004-rewrite-targetnamecache into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/bug-718004-rewrite-targetnamecache into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
#198778 IBugTaskSet.dangerousGetAllTasks() shouldn't exist
https://bugs.launchpad.net/bugs/198778
#718004 update-bugtask-targetnamecaches is horrifyingly inefficient
https://bugs.launchpad.net/bugs/718004
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/bug-718004-rewrite-targetnamecache/+merge/49612
This branch rewrites update-bugtask-targetnamecaches to be around 1000 times faster on the production dataset.
Previously it would retrieve every bugtask ever and call a method on each. In a tunableloop. Without precaching. It took around 18 hours to run on production.
This new version instead selects all distinct targets and their cached names, calculates each target's proper name, and does a mass-update on any outdated cached names. That alone takes it down to 8 minutes on mawson. Additionally precaching all target objects takes it down to 1 minute, a 100000% improvement over production.
Tests have been changed to cope with the new script's debug output, and I've dropped BugTaskSet.dangerousGetAllTasks which is now unused.
--
https://code.launchpad.net/~wgrant/launchpad/bug-718004-rewrite-targetnamecache/+merge/49612
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/bug-718004-rewrite-targetnamecache into lp:launchpad.
=== modified file 'lib/lp/bugs/doc/bugtask.txt'
--- lib/lp/bugs/doc/bugtask.txt 2011-02-10 01:18:39 +0000
+++ lib/lp/bugs/doc/bugtask.txt 2011-02-14 11:59:57 +0000
@@ -1113,8 +1113,12 @@
>>> print err
INFO Creating lockfile: /var/lock/launchpad-launchpad-targetnamecacheupdater.lock
INFO Updating targetname cache of bugtasks.
- INFO Updating 1 BugTasks (starting id: 2).
- INFO Updating ...BugTasks...
+ INFO Calculating targets.
+ INFO Will check ... targets.
+ ...
+ INFO Updating (u'Mozilla Thunderbird',) to 'Mozilla Thunderbird NG'.
+ ...
+ INFO Updated 1 target names.
INFO Finished updating targetname cache of bugtasks.
>>> process.returncode
@@ -1192,8 +1196,11 @@
>>> updater = BugTaskTargetNameCacheUpdater(transaction, logger)
>>> updater.run()
INFO Updating targetname cache of bugtasks.
- INFO Updating 1 BugTasks (starting id: 2).
- INFO Updating ...BugTasks...
+ INFO Calculating targets.
+ ...
+ INFO Updating (u'Mozilla Thunderbird NG',) to 'Mozilla Thunderbird'.
+ ...
+ INFO Updated 1 target names.
INFO Finished updating targetname cache of bugtasks.
>>> flush_database_caches()
=== modified file 'lib/lp/bugs/interfaces/bugtask.py'
--- lib/lp/bugs/interfaces/bugtask.py 2011-02-08 05:54:20 +0000
+++ lib/lp/bugs/interfaces/bugtask.py 2011-02-14 11:59:57 +0000
@@ -1512,19 +1512,6 @@
If the col_name is unrecognized, a KeyError is raised.
"""
- # XXX kiko 2006-03-23:
- # get rid of this kludge when we have proper security for scripts.
- def dangerousGetAllTasks():
- """DO NOT USE THIS METHOD UNLESS YOU KNOW WHAT YOU ARE DOING
-
- Returns ALL BugTasks. YES, THAT INCLUDES PRIVATE ONES. Do not
- use this method. DO NOT USE IT. I REPEAT: DO NOT USE IT.
-
- This method exists solely for the purpose of scripts that need
- to do gardening over all bug tasks; the current example is
- update-bugtask-targetnamecaches.
- """
-
def getBugCountsForPackages(user, packages):
"""Return open bug counts for the list of packages.
=== modified file 'lib/lp/bugs/model/bugtask.py'
--- lib/lp/bugs/model/bugtask.py 2011-02-14 10:24:56 +0000
+++ lib/lp/bugs/model/bugtask.py 2011-02-14 11:59:57 +0000
@@ -15,6 +15,7 @@
'BugTaskSet',
'NullBugTask',
'bugtask_sort_key',
+ 'determine_target',
'get_bug_privacy_filter',
'get_related_bugtasks_search_params',
'search_value_to_where_condition',
@@ -146,7 +147,6 @@
from lp.registry.interfaces.milestone import IProjectGroupMilestone
from lp.registry.interfaces.person import (
IPerson,
- IPersonSet,
validate_person,
validate_public_person,
)
@@ -268,6 +268,29 @@
return search_params
+def determine_target(product, productseries, distribution, distroseries,
+ sourcepackagename):
+ """Returns the IBugTarget defined by the given arguments."""
+ if product:
+ return product
+ elif productseries:
+ return productseries
+ elif distribution:
+ if sourcepackagename:
+ return distribution.getSourcePackage(
+ sourcepackagename)
+ else:
+ return distribution
+ elif distroseries:
+ if sourcepackagename:
+ return distroseries.getSourcePackage(
+ sourcepackagename)
+ else:
+ return distroseries
+ else:
+ raise AssertionError("Unable to determine bugtask target.")
+
+
class BugTaskDelta:
"""See `IBugTaskDelta`."""
@@ -313,24 +336,9 @@
# We explicitly reference attributes here (rather than, say,
# IDistroBugTask.providedBy(self)), because we can't assume this
# task has yet been marked with the correct interface.
- if self.product:
- return self.product
- elif self.productseries:
- return self.productseries
- elif self.distribution:
- if self.sourcepackagename:
- return self.distribution.getSourcePackage(
- self.sourcepackagename)
- else:
- return self.distribution
- elif self.distroseries:
- if self.sourcepackagename:
- return self.distroseries.getSourcePackage(
- self.sourcepackagename)
- else:
- return self.distroseries
- else:
- raise AssertionError("Unable to determine bugtask target.")
+ return determine_target(
+ self.product, self.productseries, self.distribution,
+ self.distroseries, self.sourcepackagename)
@property
def related_tasks(self):
@@ -2924,10 +2932,6 @@
return tuple(orderby_arg)
- def dangerousGetAllTasks(self):
- """DO NOT USE THIS METHOD. For details, see `IBugTaskSet`"""
- return BugTask.select(orderBy='id')
-
def getBugCountsForPackages(self, user, packages):
"""See `IBugTaskSet`."""
distributions = sorted(
=== modified file 'lib/lp/bugs/scripts/bugtasktargetnamecaches.py'
--- lib/lp/bugs/scripts/bugtasktargetnamecaches.py 2010-08-20 20:31:18 +0000
+++ lib/lp/bugs/scripts/bugtasktargetnamecaches.py 2011-02-14 11:59:57 +0000
@@ -6,12 +6,33 @@
__metaclass__ = type
__all__ = ['BugTaskTargetNameCacheUpdater']
-from zope.component import getUtility
+from collections import defaultdict
+
from zope.interface import implements
+from canonical.launchpad.interfaces.lpstorm import (
+ IMasterStore,
+ ISlaveStore,
+ )
from canonical.launchpad.interfaces.looptuner import ITunableLoop
from canonical.launchpad.utilities.looptuner import LoopTuner
-from lp.bugs.interfaces.bugtask import IBugTaskSet
+from lp.bugs.model.bugtask import (
+ BugTask,
+ determine_target,
+ )
+from lp.registry.model.distribution import Distribution
+from lp.registry.model.distroseries import DistroSeries
+from lp.registry.model.product import Product
+from lp.registry.model.productseries import ProductSeries
+from lp.registry.model.sourcepackagename import SourcePackageName
+
+
+target_classes = (
+ Product, ProductSeries, Distribution, DistroSeries, SourcePackageName)
+target_columns = (
+ BugTask.productID, BugTask.productseriesID, BugTask.distributionID,
+ BugTask.distroseriesID, BugTask.sourcepackagenameID,
+ BugTask.targetnamecache)
class BugTaskTargetNameCachesTunableLoop(object):
@@ -19,23 +40,38 @@
implements(ITunableLoop)
- total_updated = 0
-
def __init__(self, transaction, logger, offset=0):
self.transaction = transaction
self.logger = logger
self.offset = offset
self.total_updated = 0
+ self.logger.info("Calculating targets.")
+ self.transaction.begin()
+ self.candidates = self.determineCandidates()
+ self.transaction.abort()
+ self.logger.info("Will check %i targets." % len(self.candidates))
+
+ def determineCandidates(self):
+ """Find all distinct BugTask targets with their cached names.
+
+ Returns a list of (target, set_of_cached_names) pairs, where target is
+ (Product ID, ProductSeries ID, Distribution ID, DistroSeries ID,
+ SourcePackageName ID).
+ """
+ store = ISlaveStore(BugTask)
+ candidate_set = store.find(target_columns).config(distinct=True)
+ candidates = defaultdict(set)
+ for p, ps, d, ds, spn, cache in candidate_set:
+ candidates[(p, ps, d, ds, spn)].add(cache)
+ return list(candidates.iteritems())
+
def isDone(self):
"""See `ITunableLoop`."""
- # When the main loop has no more BugTasks to process it sets
- # offset to None. Until then, it always has a numerical
- # value.
- return self.offset is None
+ return self.offset >= len(self.candidates)
def __call__(self, chunk_size):
- """Retrieve a batch of BugTasks and update their targetname caches.
+ """Take a batch of targets and update their BugTasks' name caches.
See `ITunableLoop`.
"""
@@ -49,30 +85,43 @@
start = self.offset
end = self.offset + chunk_size
+ chunk = self.candidates[start:end]
+
self.transaction.begin()
- # XXX: kiko 2006-03-23:
- # We use a special API here, which is kinda klunky, but which
- # allows us to return all bug tasks (even private ones); this should
- # eventually be changed to a more elaborate permissions scheme,
- # pending the infrastructure to do so. See bug #198778.
- bugtasks = list(
- getUtility(IBugTaskSet).dangerousGetAllTasks()[start:end])
-
- self.offset = None
- if bugtasks:
- starting_id = bugtasks[0].id
- self.logger.info("Updating %i BugTasks (starting id: %i)." %
- (len(bugtasks), starting_id))
-
- for bugtask in bugtasks:
- # We set the starting point of the next batch to the BugTask
- # id after the one we're looking at now. If there aren't any
- # bugtasks this loop will run for 0 iterations and start_id
- # will remain set to None.
- start += 1
- self.offset = start
- bugtask.updateTargetNameCache()
- self.total_updated += 1
+ store = IMasterStore(BugTask)
+
+ # Rotate the target rows into lists of object IDs to retrieve.
+ # This way we can issue just one query of each type for each batch.
+ ids_to_cache = zip(*(target for (target, names) in chunk))
+ for index, cls in enumerate(target_classes):
+ ids = (id for id in ids_to_cache[index] if id is not None)
+ list(store.find(cls, cls.id.is_in(ids)))
+
+ for target_bits, cached_names in chunk:
+ self.offset += 1
+ # Resolve the IDs to objects, and get the actual IBugTarget.
+ # If the ID is None, don't even try to get an object.
+ target_objects = (
+ (store.get(cls, id) if id is not None else None)
+ for cls, id in zip(target_classes, target_bits))
+ target = determine_target(*target_objects)
+ new_name = target.bugtargetdisplayname
+ cached_names.discard(new_name)
+ # If there are any outdated names cached, update them all in
+ # a single query.
+ if len(cached_names) > 0:
+ self.logger.info(
+ "Updating %r to '%s'." % (tuple(cached_names), new_name))
+ self.total_updated += len(cached_names)
+ conditions = (
+ col == id for col, id in zip(target_columns, target_bits))
+ to_update = store.find(
+ BugTask,
+ BugTask.targetnamecache.is_in(cached_names),
+ *conditions)
+ to_update.set(targetnamecache=new_name)
+
+ self.logger.info("Checked %i targets." % len(chunk))
self.transaction.commit()
@@ -96,7 +145,5 @@
loop_tuner = LoopTuner(loop, 2)
loop_tuner.run()
- self.logger.info("Updated %i bugtask targetname caches." %
- loop.total_updated)
+ self.logger.info("Updated %i target names." % loop.total_updated)
self.logger.info("Finished updating targetname cache of bugtasks.")
-