← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/translation-sharing-cte into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/translation-sharing-cte into lp:launchpad.

Commit message:
Reimplement the guts of POTemplateSharingSubset as a recursive CTE, fixing it to use transitive relationships as intended.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/translation-sharing-cte/+merge/232502

Reimplement the guts of POFileSharingSubset as a recursive CTE. This will now correctly respect transitivity, and all members of a set will see the same set (things would previously get very confused when a Product was linked to multiple DistributionSourcePackages, eg. for sources with a version in the name).

I also added pretty thorough tests for POFileSharingSubset, as the old ones let it be terribly broken.
-- 
https://code.launchpad.net/~wgrant/launchpad/translation-sharing-cte/+merge/232502
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/translation-sharing-cte into lp:launchpad.
=== modified file 'lib/lp/translations/model/potemplate.py'
--- lib/lp/translations/model/potemplate.py	2014-07-01 14:32:32 +0000
+++ lib/lp/translations/model/potemplate.py	2014-08-28 04:41:43 +0000
@@ -30,19 +30,14 @@
 from storm.expr import (
     And,
     Desc,
-    Func,
-    In,
     Join,
     LeftJoin,
     Or,
-    Select,
     SQL,
     )
 from storm.info import ClassAlias
-from storm.store import (
-    EmptyResultSet,
-    Store,
-    )
+from storm.properties import Int
+from storm.store import Store
 from zope.component import (
     getAdapter,
     getUtility,
@@ -54,7 +49,6 @@
 from lp.app.errors import NotFoundError
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.registry.interfaces.person import validate_public_person
-from lp.registry.model.packaging import Packaging
 from lp.registry.model.sourcepackagename import SourcePackageName
 from lp.services.database.bulk import load_related
 from lp.services.database.collection import Collection
@@ -113,6 +107,42 @@
     )
 
 
+sharing_subset_cte = """
+RECURSIVE location(product, distribution, sourcepackagename) AS (
+    VALUES (?::integer, ?::integer, ?::integer)
+  UNION
+    SELECT
+        ps_from_packaging.product, ds_from_packaging.distribution,
+        packaging_from_ps.sourcepackagename
+    FROM
+        location
+        -- Find a (Distribution, SPN) via a Packaging from a Product.
+        LEFT JOIN (
+            productseries AS ps_from_location
+            JOIN packaging AS packaging_from_ps
+                ON packaging_from_ps.productseries = ps_from_location.id
+            JOIN distroseries AS ds_from_packaging
+                ON ds_from_packaging.id = packaging_from_ps.distroseries
+
+            ) ON ps_from_location.product = location.product
+        -- Find a Product via a Packaging from a (Distribution, SPN).
+        LEFT JOIN (
+            distroseries AS ds_from_location
+            JOIN packaging AS packaging_from_ds
+                ON packaging_from_ds.distroseries = ds_from_location.id
+            JOIN productseries AS ps_from_packaging
+                ON ps_from_packaging.id = packaging_from_ds.productseries
+            ) ON (
+                ds_from_location.distribution = location.distribution
+                AND packaging_from_ds.sourcepackagename =
+                    location.sourcepackagename)
+    WHERE
+        -- All rows must have a target.
+        (ps_from_packaging.product IS NOT NULL
+         OR packaging_from_ps.distroseries IS NOT NULL))
+"""
+
+
 log = logging.getLogger(__name__)
 
 
@@ -1423,102 +1453,82 @@
             package = template.sourcepackagename.name
         return (template.name, package)
 
-    def _queryByProduct(self, templatename_clause):
-        """Build the query that finds POTemplates by their linked product.
-
-        Queries the Packaging table to find templates in linked source
-        packages, too.
-
-        :param templatename_clause: A string or a storm expression to
-        add to the where clause of the query that will select the template
-        name.
-        :return: A ResultSet for the query.
-        """
-        # Avoid circular imports.
-        from lp.registry.model.distroseries import DistroSeries
-        from lp.registry.model.productseries import ProductSeries
-
-        subquery = Select(
-            (DistroSeries.distributionID, Packaging.sourcepackagenameID),
-            tables=(Packaging, ProductSeries, DistroSeries),
-            where=And(
-                Packaging.productseriesID == ProductSeries.id,
-                Packaging.distroseriesID == DistroSeries.id,
-                ProductSeries.product == self.product),
-            distinct=True)
-        origin = LeftJoin(
-            LeftJoin(
-                POTemplate, ProductSeries,
-                POTemplate.productseriesID == ProductSeries.id),
-            DistroSeries,
-            POTemplate.distroseriesID == DistroSeries.id)
-
-        return Store.of(self.product).using(origin).find(
-            POTemplate,
-            And(
-                templatename_clause,
-                Or(
-                  ProductSeries.product == self.product,
-                  In(
-                     Func(
-                         'ROW',
-                         DistroSeries.distributionID,
-                         POTemplate.sourcepackagenameID),
-                     subquery))))
-
-    def _queryBySourcepackagename(self, templatename_clause):
-        """Build the query that finds POTemplates by their names.
-
-        :param templatename_clause: A string or a storm expression to
-        add to the where clause of the query that will select the template
-        name.
-        :return: A ResultSet for the query.
-        """
-        # Avoid circular imports.
-        from lp.registry.model.distroseries import DistroSeries
-        from lp.registry.model.productseries import ProductSeries
-
-        subquery = Select(
-            ProductSeries.productID,
-            tables=(Packaging, ProductSeries, DistroSeries),
-            where=And(
-                Packaging.productseriesID == ProductSeries.id,
-                Packaging.distroseriesID == DistroSeries.id,
-                DistroSeries.distribution == self.distribution,
-                Packaging.sourcepackagename == self.sourcepackagename),
-            distinct=True)
-        origin = LeftJoin(
-            LeftJoin(
-                POTemplate, ProductSeries,
-                POTemplate.productseriesID == ProductSeries.id),
-            DistroSeries,
-            POTemplate.distroseriesID == DistroSeries.id)
-
-        return Store.of(self.distribution).using(origin).find(
-            POTemplate,
-            And(
-                templatename_clause,
-                Or(
-                  And(
-                    DistroSeries.distribution == self.distribution,
-                    POTemplate.sourcepackagename == self.sourcepackagename),
-                  In(ProductSeries.productID, subquery))))
-
-    def _queryByDistribution(self, templatename_clause):
-        """Special case when templates are searched across a distribution."""
-        return Store.of(self.distribution).find(
-            POTemplate, templatename_clause)
-
     def _queryPOTemplates(self, templatename_clause):
-        """Select the right query to be used."""
+        """Find all POTemplates in this sharing subset."""
+        from lp.registry.model.distroseries import DistroSeries
+        from lp.registry.model.productseries import ProductSeries
+        product_id = distro_id = spn_id = None
         if self.product is not None:
-            return self._queryByProduct(templatename_clause)
-        elif self.sourcepackagename is not None:
-            return self._queryBySourcepackagename(templatename_clause)
-        elif self.distribution is not None and self.sourcepackagename is None:
-            return self._queryByDistribution(templatename_clause)
+            product_id = self.product.id
         else:
-            return EmptyResultSet()
+            distro_id = self.distribution.id
+            spn_id = self.sourcepackagename.id
+
+        # Fake Storm table class for the CTE. The PK is a lie.
+        class Location:
+            __storm_table__ = 'location'
+            __storm_primary__ = (
+                'product_id', 'distribution_id', 'sourcepackagename_id')
+            product_id = Int(name='product')
+            distribution_id = Int(name='distribution')
+            sourcepackagename_id = Int(name='sourcepackagename')
+
+        # Message sharing is by necessity symmetric and transitive,
+        # since the POTMsgSets records themselves are shared. Two
+        # relationships are defined to cause templates to share:
+        #
+        #  - A template shares with identically named templates in other
+        #    series in its own location (other ProductSeries in the same
+        #    Product, or other SourcePackages in the same
+        #    DistributionSourcePackage).
+        #
+        #  - A template shares with identically named templates on the
+        #    other side of a Packaging record (in a ProductSeries for a
+        #    SourcePackage template, or a SourcePackage for a
+        #    ProductSeries template).
+        #
+        # To avoid surprising changes when a new template is created
+        # somewhere, and to simplify implementation, we make the second
+        # relationship slightly more liberal: Packagings directly link
+        # entire Products and DistributionSourcePackages, not just the
+        # specified series.
+        #
+        # The only practical change is that templates can be shared
+        # between locations even if a similarly named template doesn't
+        # exist in the exact series specified by the Packaging. With
+        # transitivity but without this extension, two big existing
+        # sharing subsets might end up being joined later on just
+        # because a template was copied from an unlinked series to a
+        # linked one.
+        #
+        # XXX wgrant 2014-08-28: An outdated version of these rules is
+        # reimplemented in TranslationSplitter, but TranslationSplitter
+        # is terribly broken (see XXXs in its module).
+
+        # The recursive CTE finds all Products and
+        # DistributionSourcePackages that are transitively linked by
+        # Packagings, starting with the location that we have. We then
+        # join out to their series and find any templates with the right
+        # name.
+        context = IStore(POTemplate).with_(
+            SQL(sharing_subset_cte, params=(product_id, distro_id, spn_id)))
+        return context.using(
+            Location,
+            LeftJoin(
+                DistroSeries,
+                DistroSeries.distributionID == Location.distribution_id),
+            LeftJoin(
+                ProductSeries,
+                ProductSeries.productID == Location.product_id),
+            Join(
+                POTemplate,
+                And(
+                    Or(
+                        POTemplate.productseriesID == ProductSeries.id,
+                        And(POTemplate.distroseriesID == DistroSeries.id,
+                            POTemplate.sourcepackagenameID ==
+                                Location.sourcepackagename_id)),
+                    templatename_clause))).find(POTemplate)
 
     def getSharingPOTemplates(self, potemplate_name):
         """See `IPOTemplateSharingSubset`."""

=== modified file 'lib/lp/translations/tests/test_potemplate.py'
--- lib/lp/translations/tests/test_potemplate.py	2013-10-21 04:58:19 +0000
+++ lib/lp/translations/tests/test_potemplate.py	2014-08-28 04:41:43 +0000
@@ -336,14 +336,16 @@
             name='foo')
 
         subset = getUtility(IPOTemplateSet).getSharingSubset(
-            distribution=self.ubuntu)
+            distribution=self.ubuntu, sourcepackagename=self.package)
+        other_subset = getUtility(IPOTemplateSet).getSharingSubset(
+            distribution=self.ubuntu, sourcepackagename=other_package)
         classes = subset.groupEquivalentPOTemplates()
+        other_classes = other_subset.groupEquivalentPOTemplates()
 
-        self.assertTrue(('foo', self.package.name) in classes)
-        self.assertEqual(classes[('foo', self.package.name)], [our_template])
-        self.assertTrue(('foo', other_package.name) in classes)
-        self.assertEqual(
-            classes[('foo', other_package.name)], [other_template])
+        self.assertEqual(
+            {('foo', self.package.name): [our_template]}, classes)
+        self.assertEqual(
+            {('foo', other_package.name): [other_template]}, other_classes)
 
     def test_EquivalenceByNamePattern(self):
         # We can obtain equivalence classes for a distribution by
@@ -355,7 +357,7 @@
             name=unique_name)
 
         subset = getUtility(IPOTemplateSet).getSharingSubset(
-            distribution=self.ubuntu)
+            distribution=self.ubuntu, sourcepackagename=self.package)
         classes = subset.groupEquivalentPOTemplates(
             name_pattern=u'krungthepmahanakorn.*-etc')
 
@@ -703,6 +705,216 @@
             sourcepackage=self.sourcepackage, name=self.shared_template_name)
 
 
+class TestPOTemplateSharingSubset(TestCaseWithFactory):
+    """Test that POTemplateSharingSubset consistently calculates sharing sets.
+
+    Message sharing must be a symmetric and transitive relation. This
+    set of tests verifies that an identical set is calculated for any
+    member of the set.
+    """
+
+    layer = DatabaseFunctionalLayer
+
+    def setUp(self):
+        super(TestPOTemplateSharingSubset, self).setUp()
+        self.p1 = self.factory.makeProduct()
+        self.p1s1 = self.factory.makeProductSeries(product=self.p1)
+        self.p1s2 = self.factory.makeProductSeries(product=self.p1)
+        self.p2 = self.factory.makeProduct()
+        self.p2s1 = self.factory.makeProductSeries(product=self.p2)
+        self.p2s2 = self.factory.makeProductSeries(product=self.p2)
+
+        self.d1 = self.factory.makeDistribution()
+        self.d1s1 = self.factory.makeDistroSeries(distribution=self.d1)
+        self.d1s2 = self.factory.makeDistroSeries(distribution=self.d1)
+        self.d2 = self.factory.makeDistribution()
+        self.d2s1 = self.factory.makeDistroSeries(distribution=self.d2)
+        self.d2s2 = self.factory.makeDistroSeries(distribution=self.d2)
+
+        self.spn1 = self.factory.makeSourcePackageName('package1')
+        self.spn2 = self.factory.makeSourcePackageName('package2')
+
+        self.pots = {}
+        for ps in (self.p1s1, self.p1s2, self.p2s1, self.p2s2):
+            for name in ('template1', 'template2'):
+                self.pots[(ps, name)] = self.factory.makePOTemplate(
+                    productseries=ps, name=name)
+        for ds in (self.d1s1, self.d1s2, self.d2s1, self.d2s2):
+            for spn in (self.spn1, self.spn2):
+                for name in ('template1', 'template2'):
+                    self.pots[(ds, spn, name)] = self.factory.makePOTemplate(
+                        distroseries=ds, sourcepackagename=spn, name=name)
+
+    def assertSelfContained(self, specs):
+        """Check that a set of templates is mutually sharing.
+
+        Each template in the given set must share with all the other
+        templates in the set, and none outside it.
+        """
+        pots = [self.pots[spec] for spec in specs]
+        for pot in pots:
+            subset = getUtility(IPOTemplateSet).getSharingSubset(
+                product=pot.product, distribution=pot.distribution,
+                sourcepackagename=pot.sourcepackagename)
+            self.assertContentEqual(
+                pots, subset.getSharingPOTemplates(pot.name))
+
+    def test_unlinked(self):
+        self.assertSelfContained([
+            (self.p1s1, 'template1'), (self.p1s2, 'template1')])
+        self.assertSelfContained([
+            (self.p1s1, 'template2'), (self.p1s2, 'template2')])
+        self.assertSelfContained([
+            (self.d1s1, self.spn1, 'template1'),
+            (self.d1s2, self.spn1, 'template1')])
+        self.assertSelfContained([
+            (self.d1s1, self.spn1, 'template2'),
+            (self.d1s2, self.spn1, 'template2')])
+        self.assertSelfContained([
+            (self.d1s1, self.spn2, 'template1'),
+            (self.d1s2, self.spn2, 'template1')])
+
+    def test_product_linked_to_distro(self):
+        # Linking a ProductSeries and a SourcePackage with a Packaging
+        # causes the templates on each side to share.
+        # Merge p1 and (d1, spn1).
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d1s1,
+            sourcepackagename=self.spn1)
+
+        # template1 and template2 in all series of p1 and d1's spn1
+        # package are shared.
+        self.assertSelfContained([
+            (self.p1s1, 'template1'), (self.p1s2, 'template1'),
+            (self.d1s1, self.spn1, 'template1'),
+            (self.d1s2, self.spn1, 'template1')])
+        self.assertSelfContained([
+            (self.p1s1, 'template2'), (self.p1s2, 'template2'),
+            (self.d1s1, self.spn1, 'template2'),
+            (self.d1s2, self.spn1, 'template2')])
+
+        # But p2, d1's spn2, and d2's spn1 are all still separate.
+        self.assertSelfContained([
+            (self.p2s1, 'template1'), (self.p2s2, 'template1')])
+        self.assertSelfContained([
+            (self.d1s1, self.spn2, 'template1'),
+            (self.d1s2, self.spn2, 'template1')])
+        self.assertSelfContained([
+            (self.d2s1, self.spn1, 'template1'),
+            (self.d2s2, self.spn1, 'template1')])
+
+    def test_product_linked_to_two_distros(self):
+        # Multiple Packaging links extend the sharing domain further.
+        # Merge p1, (d1, spn1) and (d2, spn1).
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d1s1,
+            sourcepackagename=self.spn1)
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d2s2,
+            sourcepackagename=self.spn1)
+
+        # template1 and template2 in all series of p1, (d1, spn1) and
+        # (d2, spn1) are all shared.
+        self.assertSelfContained([
+            (self.p1s1, 'template1'), (self.p1s2, 'template1'),
+            (self.d1s1, self.spn1, 'template1'),
+            (self.d1s2, self.spn1, 'template1'),
+            (self.d2s1, self.spn1, 'template1'),
+            (self.d2s2, self.spn1, 'template1')])
+        self.assertSelfContained([
+            (self.p1s1, 'template2'), (self.p1s2, 'template2'),
+            (self.d1s1, self.spn1, 'template2'),
+            (self.d1s2, self.spn1, 'template2'),
+            (self.d2s1, self.spn1, 'template2'),
+            (self.d2s2, self.spn1, 'template2')])
+
+        # But p2, (d1, spn2), and (d2, spn2) are all still separate.
+        self.assertSelfContained([
+            (self.p2s1, 'template1'), (self.p2s2, 'template1')])
+        self.assertSelfContained([
+            (self.d1s1, self.spn2, 'template1'),
+            (self.d1s2, self.spn2, 'template1')])
+        self.assertSelfContained([
+            (self.d2s1, self.spn2, 'template1'),
+            (self.d2s2, self.spn2, 'template1')])
+
+    def test_product_linked_to_different_packages_in_two_distros(self):
+        # Packaging records' SourcePackageNames are respected.
+        # Merge p1, (d1, spn1) and (d2, spn2).
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d1s1,
+            sourcepackagename=self.spn1)
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d2s2,
+            sourcepackagename=self.spn2)
+
+        # template1 and template2 in all series of p1, (d1, spn1) and
+        # (d2, spn2) are all shared.
+        self.assertSelfContained([
+            (self.p1s1, 'template1'), (self.p1s2, 'template1'),
+            (self.d1s1, self.spn1, 'template1'),
+            (self.d1s2, self.spn1, 'template1'),
+            (self.d2s1, self.spn2, 'template1'),
+            (self.d2s2, self.spn2, 'template1')])
+        self.assertSelfContained([
+            (self.p1s1, 'template2'), (self.p1s2, 'template2'),
+            (self.d1s1, self.spn1, 'template2'),
+            (self.d1s2, self.spn1, 'template2'),
+            (self.d2s1, self.spn2, 'template2'),
+            (self.d2s2, self.spn2, 'template2')])
+
+        # But p2, (d1, spn2), and (d2, spn1) are all still separate.
+        self.assertSelfContained([
+            (self.p2s1, 'template1'), (self.p2s2, 'template1')])
+        self.assertSelfContained([
+            (self.d1s1, self.spn2, 'template1'),
+            (self.d1s2, self.spn2, 'template1')])
+        self.assertSelfContained([
+            (self.d2s1, self.spn1, 'template1'),
+            (self.d2s2, self.spn1, 'template1')])
+
+    def test_multiple_products_interlinked(self):
+        # In a contrived scenario there can be multiple Products
+        # involved, by linking different SourcePackages for the same
+        # Distribution and SourcePackageName to ProductSeries in
+        # different Products. This combines those sharing subsets as
+        # expected.
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d1s1,
+            sourcepackagename=self.spn1)
+        self.factory.makePackagingLink(
+            productseries=self.p1s1, distroseries=self.d2s1,
+            sourcepackagename=self.spn2)
+        self.factory.makePackagingLink(
+            productseries=self.p2s1, distroseries=self.d2s2,
+            sourcepackagename=self.spn2)
+
+        # template1 and template2 in all series of p1, p2, (d1, spn1)
+        # and (d2, spn2) are all shared.
+        self.assertSelfContained([
+            (self.p1s1, 'template1'), (self.p1s2, 'template1'),
+            (self.p2s1, 'template1'), (self.p2s2, 'template1'),
+            (self.d1s1, self.spn1, 'template1'),
+            (self.d1s2, self.spn1, 'template1'),
+            (self.d2s1, self.spn2, 'template1'),
+            (self.d2s2, self.spn2, 'template1')])
+        self.assertSelfContained([
+            (self.p1s1, 'template2'), (self.p1s2, 'template2'),
+            (self.p2s1, 'template2'), (self.p2s2, 'template2'),
+            (self.d1s1, self.spn1, 'template2'),
+            (self.d1s2, self.spn1, 'template2'),
+            (self.d2s1, self.spn2, 'template2'),
+            (self.d2s2, self.spn2, 'template2')])
+
+        # But (d1, spn2) and (d2, spn1) remain isolated.
+        self.assertSelfContained([
+            (self.d1s1, self.spn2, 'template1'),
+            (self.d1s2, self.spn2, 'template1')])
+        self.assertSelfContained([
+            (self.d2s1, self.spn1, 'template1'),
+            (self.d2s2, self.spn1, 'template1')])
+
+
 class TestPOTemplateSubset(TestCaseWithFactory):
     """Test POTemplate functions not covered by doctests."""
 


Follow ups