launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #05476
[Merge] lp:~allenap/launchpad/bugnomination-timeout-bug-874250 into lp:launchpad
Gavin Panella has proposed merging lp:~allenap/launchpad/bugnomination-timeout-bug-874250 into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #874250 in Launchpad itself: "BugNomination:+editstatus timeout for bugs with many tasks"
https://bugs.launchpad.net/launchpad/+bug/874250
For more details, see:
https://code.launchpad.net/~allenap/launchpad/bugnomination-timeout-bug-874250/+merge/81836
This works on one part of the performance issue outlined in the bug:
getting DistributionSourcePackageInDatabase records out that back-fill
for ephemeral DistributionSourcePackage objects.
At present they're pulled out on demand, so one at a time, and if a
new DSP object is created they are queried for again.
Normally that would be okay, because Storm could answer from its
cache. However, they're looked up by distribution and source package
name, so Storm can't answer from its cache.
This branch introduces a LRU cache for (dist_id, spn_id) -> dsp_id.
The cache is maintained per-thread, and is limited to 1000 entries,
and is allowed to exist across transaction boundaries. There is code
in DistributionSourcePackageInDatabase.get() to ensure that stale or
bogus entries in the cache are detected.
This should save 264 queries in OOPS-2113CG81, or ~0.7 seconds.
--
https://code.launchpad.net/~allenap/launchpad/bugnomination-timeout-bug-874250/+merge/81836
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/launchpad/bugnomination-timeout-bug-874250 into lp:launchpad.
=== modified file 'lib/lp/registry/model/distributionsourcepackage.py'
--- lib/lp/registry/model/distributionsourcepackage.py 2011-10-05 18:02:45 +0000
+++ lib/lp/registry/model/distributionsourcepackage.py 2011-11-10 11:54:25 +0000
@@ -14,7 +14,9 @@
import itertools
import operator
+from threading import local
+from bzrlib.lru_cache import LRUCache
from lazr.restful.utils import smartquote
from sqlobject.sqlbuilder import SQLConstant
from storm.expr import (
@@ -542,23 +544,14 @@
@classmethod
def _get(cls, distribution, sourcepackagename):
- return Store.of(distribution).find(
- DistributionSourcePackageInDatabase,
- DistributionSourcePackageInDatabase.sourcepackagename ==
- sourcepackagename,
- DistributionSourcePackageInDatabase.distribution ==
- distribution).one()
+ return DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
@classmethod
def _new(cls, distribution, sourcepackagename,
is_upstream_link_allowed=False):
- dsp = DistributionSourcePackageInDatabase()
- dsp.distribution = distribution
- dsp.sourcepackagename = sourcepackagename
- dsp.is_upstream_link_allowed = is_upstream_link_allowed
- Store.of(distribution).add(dsp)
- Store.of(distribution).flush()
- return dsp
+ return DistributionSourcePackageInDatabase.new(
+ distribution, sourcepackagename, is_upstream_link_allowed)
@classmethod
def ensure(cls, spph=None, sourcepackage=None):
@@ -591,6 +584,10 @@
cls._new(distribution, sourcepackagename, upstream_link_allowed)
+class ThreadLocalLRUCache(LRUCache, local):
+ """A per-thread LRU cache."""
+
+
class DistributionSourcePackageInDatabase(Storm):
"""Temporary class to allow access to the database."""
@@ -627,3 +624,72 @@
releases = self.distribution.getCurrentSourceReleases(
[self.sourcepackagename])
return releases.get(self)
+
+ # This is a per-thread LRU cache of mappings from (distribution_id,
+ # sourcepackagename_id)) to dsp_id. See get() for how this cache helps to
+ # avoid database hits without causing consistency issues.
+ _cache = ThreadLocalLRUCache(1000, 700)
+
+ @classmethod
+ def get(cls, distribution, sourcepackagename):
+ """Get a DSP given distribution and source package name.
+
+ Attempts to use a cached `(distro_id, spn_id) --> dsp_id` mapping to
+ avoid hitting the database.
+ """
+ # Check for a cached mapping from (distro_id, spn_id) to dsp_id.
+ dsp_cache_key = distribution.id, sourcepackagename.id
+ dsp_id = cls._cache.get(dsp_cache_key)
+ # If not, fetch from the database.
+ if dsp_id is None:
+ return cls.getDirect(distribution, sourcepackagename)
+ # Try store.get(), allowing Storm to answer from cache if it can.
+ store = Store.of(distribution)
+ dsp = store.get(DistributionSourcePackageInDatabase, dsp_id)
+ # If it's not found, query the database; the mapping might be stale.
+ if dsp is None:
+ return cls.getDirect(distribution, sourcepackagename)
+ # Check that the mapping in the cache was correct.
+ if distribution.id != dsp.distribution_id:
+ return cls.getDirect(distribution, sourcepackagename)
+ if sourcepackagename.id != dsp.sourcepackagename_id:
+ return cls.getDirect(distribution, sourcepackagename)
+ # Cache hit, phew.
+ return dsp
+
+ @classmethod
+ def getDirect(cls, distribution, sourcepackagename):
+ """Get a DSP given distribution and source package name.
+
+ Caches the `(distro_id, spn_id) --> dsp_id` mapping, but does not
+ otherwise use the cache; it always goes to the database.
+ """
+ dsp = Store.of(distribution).find(
+ DistributionSourcePackageInDatabase,
+ DistributionSourcePackageInDatabase.sourcepackagename ==
+ sourcepackagename,
+ DistributionSourcePackageInDatabase.distribution ==
+ distribution).one()
+ dsp_cache_key = distribution.id, sourcepackagename.id
+ if dsp is None:
+ pass # No way to eject things from the cache!
+ else:
+ cls._cache[dsp_cache_key] = dsp.id
+ return dsp
+
+ @classmethod
+ def new(cls, distribution, sourcepackagename,
+ is_upstream_link_allowed=False):
+ """Create a new DSP with the given parameters.
+
+ Caches the `(distro_id, spn_id) --> dsp_id` mapping.
+ """
+ dsp = DistributionSourcePackageInDatabase()
+ dsp.distribution = distribution
+ dsp.sourcepackagename = sourcepackagename
+ dsp.is_upstream_link_allowed = is_upstream_link_allowed
+ Store.of(distribution).add(dsp)
+ Store.of(distribution).flush()
+ dsp_cache_key = distribution.id, sourcepackagename.id
+ cls._cache[dsp_cache_key] = dsp.id
+ return dsp
=== modified file 'lib/lp/registry/tests/test_distributionsourcepackage.py'
--- lib/lp/registry/tests/test_distributionsourcepackage.py 2011-10-04 21:43:11 +0000
+++ lib/lp/registry/tests/test_distributionsourcepackage.py 2011-11-10 11:54:25 +0000
@@ -5,10 +5,13 @@
__metaclass__ = type
+from storm.store import Store
+from testtools.matchers import Equals
import transaction
from zope.component import getUtility
from zope.security.proxy import removeSecurityProxy
+from canonical.database.sqlbase import flush_database_updates
from canonical.launchpad.interfaces.lpstorm import IStore
from canonical.testing.layers import (
DatabaseFunctionalLayer,
@@ -22,7 +25,11 @@
from lp.registry.model.karma import KarmaTotalCache
from lp.soyuz.enums import PackagePublishingStatus
from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
-from lp.testing import TestCaseWithFactory
+from lp.testing import (
+ StormStatementRecorder,
+ TestCaseWithFactory,
+ )
+from lp.testing.matchers import HasQueryCount
class TestDistributionSourcePackage(TestCaseWithFactory):
@@ -262,3 +269,174 @@
archive.name for archive in related_archives]
self.assertEqual(related_archive_names, ['gedit-beta'])
+
+
+class TestDistributionSourcePackageInDatabase(TestCaseWithFactory):
+
+ layer = DatabaseFunctionalLayer
+
+ def setUp(self):
+ super(TestDistributionSourcePackageInDatabase, self).setUp()
+ # We must reset the mapping cache so that tests start from scratch.
+ DistributionSourcePackageInDatabase._cache.clear()
+
+ def test_new(self):
+ # DistributionSourcePackageInDatabase.new() creates a new DSP, adds it
+ # to the store, and updates the mapping cache.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ dsp = DistributionSourcePackageInDatabase.new(
+ distribution, sourcepackagename)
+ self.assertIs(Store.of(distribution), Store.of(dsp))
+ self.assertEqual(
+ {(distribution.id, sourcepackagename.id): dsp.id},
+ DistributionSourcePackageInDatabase._cache.items())
+
+ def test_getDirect_not_found(self):
+ # DistributionSourcePackageInDatabase.getDirect() returns None if a
+ # DSP does not exist in the database. It does not modify the mapping
+ # cache.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp = DistributionSourcePackageInDatabase.getDirect(
+ distribution, sourcepackagename)
+ self.assertIs(None, dsp)
+ self.assertThat(recorder, HasQueryCount(Equals(1)))
+ self.assertEqual(
+ {}, DistributionSourcePackageInDatabase._cache.items())
+
+ def test_getDirect_found(self):
+ # DistributionSourcePackageInDatabase.getDirect() returns the
+ # DSPInDatabase if one already exists in the database. It also adds
+ # the new mapping to the mapping cache.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ dsp = DistributionSourcePackageInDatabase.new(
+ distribution, sourcepackagename)
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp_found = DistributionSourcePackageInDatabase.getDirect(
+ dsp.distribution, dsp.sourcepackagename)
+ self.assertIs(dsp, dsp_found)
+ self.assertThat(recorder, HasQueryCount(Equals(1)))
+ self.assertEqual(
+ {(distribution.id, sourcepackagename.id): dsp.id},
+ DistributionSourcePackageInDatabase._cache.items())
+
+ def test_get_not_cached_and_not_found(self):
+ # DistributionSourcePackageInDatabase.get() returns None if a DSP does
+ # not exist in the database and no mapping cache entry exists for
+ # it. It does not modify the mapping cache.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp = DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
+ self.assertIs(None, dsp)
+ self.assertThat(recorder, HasQueryCount(Equals(1)))
+ self.assertEqual(
+ {}, DistributionSourcePackageInDatabase._cache.items())
+
+ def test_get_cached_and_not_found(self):
+ # DistributionSourcePackageInDatabase.get() returns None if a DSP does
+ # not exist in the database for a stale mapping cache entry.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ # Enter a stale entry in the mapping cache.
+ stale_dsp_cache_key = distribution.id, sourcepackagename.id
+ DistributionSourcePackageInDatabase._cache[stale_dsp_cache_key] = -123
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp = DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
+ self.assertIs(None, dsp)
+ # A stale mapping means that we have to issue two queries: the first
+ # queries for the stale DSP from the database, the second gets the
+ # correct DSP (or None).
+ self.assertThat(recorder, HasQueryCount(Equals(2)))
+
+ def test_get_cached_and_not_found_with_bogus_dsp(self):
+ # DistributionSourcePackageInDatabase.get() returns None if a DSP does
+ # exist in the database for a mapping cache entry, but the DSP
+ # discovered does not match the mapping cache key.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ # Put a bogus entry into the mapping cache.
+ bogus_dsp = DistributionSourcePackageInDatabase.new(
+ distribution, self.factory.makeSourcePackageName())
+ bogus_dsp_cache_key = distribution.id, sourcepackagename.id
+ DistributionSourcePackageInDatabase._cache[
+ bogus_dsp_cache_key] = bogus_dsp.id
+ # Invalidate the bogus DSP from Storm's cache.
+ Store.of(bogus_dsp).invalidate(bogus_dsp)
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp = DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
+ self.assertIs(None, dsp)
+ # A stale mapping means that we have to issue two queries: the first
+ # gets the bogus DSP from the database, the second gets the correct
+ # DSP (or None).
+ self.assertThat(recorder, HasQueryCount(Equals(2)))
+
+ def test_get_cached_and_not_found_with_bogus_dsp_in_storm_cache(self):
+ # DistributionSourcePackageInDatabase.get() returns None if a DSP does
+ # exist in the database for a mapping cache entry, but the DSP
+ # discovered does not match the mapping cache key.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ # Put a bogus entry into the mapping cache.
+ bogus_dsp = DistributionSourcePackageInDatabase.new(
+ distribution, self.factory.makeSourcePackageName())
+ bogus_dsp_cache_key = distribution.id, sourcepackagename.id
+ DistributionSourcePackageInDatabase._cache[
+ bogus_dsp_cache_key] = bogus_dsp.id
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp = DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
+ self.assertIs(None, dsp)
+ # A stale mapping means that we ordinarily have to issue two queries:
+ # the first gets the bogus DSP from the database, the second gets the
+ # correct DSP (or None). However, the bogus DSP is already in Storm's
+ # cache, so we issue only one query.
+ self.assertThat(recorder, HasQueryCount(Equals(1)))
+
+ def test_get_not_cached_and_found(self):
+ # DistributionSourcePackageInDatabase.get() returns the DSP if it's
+ # found in the database even if no mapping cache entry exists for
+ # it. It updates the mapping cache with this discovered information.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ dsp = DistributionSourcePackageInDatabase.new(
+ distribution, sourcepackagename)
+ # new() updates the mapping cache so we must clear it.
+ DistributionSourcePackageInDatabase._cache.clear()
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp_found = DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
+ self.assertIs(dsp, dsp_found)
+ self.assertThat(recorder, HasQueryCount(Equals(1)))
+ self.assertEqual(
+ {(distribution.id, sourcepackagename.id): dsp.id},
+ DistributionSourcePackageInDatabase._cache.items())
+
+ def test_get_cached_and_found(self):
+ # DistributionSourcePackageInDatabase.get() returns the DSP if it's
+ # found in the database from a good mapping cache entry.
+ distribution = self.factory.makeDistribution()
+ sourcepackagename = self.factory.makeSourcePackageName()
+ dsp = DistributionSourcePackageInDatabase.new(
+ distribution, sourcepackagename)
+ flush_database_updates()
+ with StormStatementRecorder() as recorder:
+ dsp_found = DistributionSourcePackageInDatabase.get(
+ distribution, sourcepackagename)
+ self.assertIs(dsp, dsp_found)
+ # Hurrah! This is what we're aiming for: a DSP that is in the mapping
+ # cache *and* in Storm's cache.
+ self.assertThat(recorder, HasQueryCount(Equals(0)))