launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~wgrant/launchpad/indices-to-archivepublisher into lp:launchpad

To: mp+240260@xxxxxxxxxxxxxxxxxx
From: William Grant <me@xxxxxxxxxxxxxxxxxx>
Date: Fri, 31 Oct 2014 13:23:26 -0000
Reply-to: mp+240260@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

William Grant has proposed merging lp:~wgrant/launchpad/indices-to-archivepublisher into lp:launchpad.

Commit message:
Move the index stanza build methods from the publishing classes to lp.archivepublisher.indices.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/indices-to-archivepublisher/+merge/240260

Despite living in the 2000-line lp.soyuz.model.publishing module, the apt index stanza generation methods are used by a single callsite each, all the way over in lp.archivepublisher.publishing. Let's move them nearby to try to make things a bit less monolithic.
-- 
https://code.launchpad.net/~wgrant/launchpad/indices-to-archivepublisher/+merge/240260
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/indices-to-archivepublisher into lp:launchpad.

=== added file 'lib/lp/archivepublisher/indices.py'
--- lib/lp/archivepublisher/indices.py	1970-01-01 00:00:00 +0000
+++ lib/lp/archivepublisher/indices.py	2014-10-31 13:23:01 +0000
@@ -0,0 +1,236 @@
+# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__all__ = [
+    'IndexStanzaFields',
+    'build_binary_stanza_fields',
+    'build_source_stanza_fields',
+    'build_translations_stanza_fields',
+    ]
+
+__metaclass__ = type
+
+import hashlib
+import os.path
+import re
+
+from lp.soyuz.model.publishing import makePoolPath
+
+
+class IndexStanzaFields:
+    """Store and format ordered Index Stanza fields."""
+
+    def __init__(self):
+        self._names_lower = set()
+        self.fields = []
+
+    def append(self, name, value):
+        """Append an (field, value) tuple to the internal list.
+
+        Then we can use the FIFO-like behaviour in makeOutput().
+        """
+        if name.lower() in self._names_lower:
+            return
+        self._names_lower.add(name.lower())
+        self.fields.append((name, value))
+
+    def extend(self, entries):
+        """Extend the internal list with the key-value pairs in entries.
+        """
+        for name, value in entries:
+            self.append(name, value)
+
+    def makeOutput(self):
+        """Return a line-by-line aggregation of appended fields.
+
+        Empty fields values will cause the exclusion of the field.
+        The output order will preserve the insertion order, FIFO.
+        """
+        output_lines = []
+        for name, value in self.fields:
+            if not value:
+                continue
+
+            # do not add separation space for the special file list fields.
+            if name not in ('Files', 'Checksums-Sha1', 'Checksums-Sha256'):
+                value = ' %s' % value
+
+            # XXX Michael Nelson 20090930 bug=436182. We have an issue
+            # in the upload parser that has
+            #   1. introduced '\n' at the end of multiple-line-spanning
+            #      fields, such as dsc_binaries, but potentially others,
+            #   2. stripped the leading space from each subsequent line
+            #      of dsc_binaries values that span multiple lines.
+            # This is causing *incorrect* Source indexes to be created.
+            # This work-around can be removed once the fix for bug 436182
+            # is in place and the tainted data has been cleaned.
+            # First, remove any trailing \n or spaces.
+            value = value.rstrip()
+
+            # Second, as we have corrupt data where subsequent lines
+            # of values spanning multiple lines are not preceded by a
+            # space, we ensure that any \n in the value that is *not*
+            # followed by a white-space character has a space inserted.
+            value = re.sub(r"\n(\S)", r"\n \1", value)
+
+            output_lines.append('%s:%s' % (name, value))
+
+        return '\n'.join(output_lines)
+
+
+def format_file_list(l):
+    return ''.join('\n %s %s %s' % ((h,) + f) for (h, f) in l)
+
+
+def format_description(summary, description):
+    # description field in index is an association of summary and
+    # description or the summary only if include_long_descriptions
+    # is false, as:
+    #
+    # Descrition: <SUMMARY>\n
+    #  <DESCRIPTION L1>
+    #  ...
+    #  <DESCRIPTION LN>
+    descr_lines = [line.lstrip() for line in description.splitlines()]
+    bin_description = '%s\n %s' % (summary, '\n '.join(descr_lines))
+    return bin_description
+
+
+def build_source_stanza_fields(spr, component, section):
+    """See `IPublishing`."""
+    # Special fields preparation.
+    pool_path = makePoolPath(spr.name, component.name)
+    files_list = []
+    sha1_list = []
+    sha256_list = []
+    for spf in spr.files:
+        common = (
+            spf.libraryfile.content.filesize, spf.libraryfile.filename)
+        files_list.append((spf.libraryfile.content.md5, common))
+        sha1_list.append((spf.libraryfile.content.sha1, common))
+        sha256_list.append((spf.libraryfile.content.sha256, common))
+    # Filling stanza options.
+    fields = IndexStanzaFields()
+    fields.append('Package', spr.name)
+    fields.append('Binary', spr.dsc_binaries)
+    fields.append('Version', spr.version)
+    fields.append('Section', section.name)
+    fields.append('Maintainer', spr.dsc_maintainer_rfc822)
+    fields.append('Build-Depends', spr.builddepends)
+    fields.append('Build-Depends-Indep', spr.builddependsindep)
+    fields.append('Build-Conflicts', spr.build_conflicts)
+    fields.append('Build-Conflicts-Indep', spr.build_conflicts_indep)
+    fields.append('Architecture', spr.architecturehintlist)
+    fields.append('Standards-Version', spr.dsc_standards_version)
+    fields.append('Format', spr.dsc_format)
+    fields.append('Directory', pool_path)
+    fields.append('Files', format_file_list(files_list))
+    fields.append('Checksums-Sha1', format_file_list(sha1_list))
+    fields.append('Checksums-Sha256', format_file_list(sha256_list))
+    fields.append('Homepage', spr.homepage)
+    if spr.user_defined_fields:
+        fields.extend(spr.user_defined_fields)
+
+    return fields
+
+
+def build_binary_stanza_fields(bpr, component, section, priority,
+                               phased_update_percentage,
+                               separate_long_descriptions=False):
+    """See `IPublishing`."""
+    spr = bpr.build.source_package_release
+
+    # binaries have only one file, the DEB
+    bin_file = bpr.files[0]
+    bin_filename = bin_file.libraryfile.filename
+    bin_size = bin_file.libraryfile.content.filesize
+    bin_md5 = bin_file.libraryfile.content.md5
+    bin_sha1 = bin_file.libraryfile.content.sha1
+    bin_sha256 = bin_file.libraryfile.content.sha256
+    bin_filepath = os.path.join(
+        makePoolPath(spr.name, component.name), bin_filename)
+    description = format_description(bpr.summary, bpr.description)
+    # Our formatted description isn't \n-terminated, but apt
+    # considers the trailing \n to be part of the data to hash.
+    bin_description_md5 = hashlib.md5(
+        description.encode('utf-8') + '\n').hexdigest()
+    if separate_long_descriptions:
+        # If distroseries.include_long_descriptions is False, the
+        # description should be the summary
+        bin_description = bpr.summary
+    else:
+        bin_description = description
+
+    # Dealing with architecturespecific field.
+    # Present 'all' in every archive index for architecture
+    # independent binaries.
+    if bpr.architecturespecific:
+        architecture = bpr.build.distro_arch_series.architecturetag
+    else:
+        architecture = 'all'
+
+    essential = None
+    if bpr.essential:
+        essential = 'yes'
+
+    source = None
+    if bpr.version != spr.version:
+        source = '%s (%s)' % (spr.name, spr.version)
+    elif bpr.name != spr.name:
+        source = spr.name
+
+    fields = IndexStanzaFields()
+    fields.append('Package', bpr.name)
+    fields.append('Source', source)
+    fields.append('Priority', priority.title.lower())
+    fields.append('Section', section.name)
+    fields.append('Installed-Size', bpr.installedsize)
+    fields.append('Maintainer', spr.dsc_maintainer_rfc822)
+    fields.append('Architecture', architecture)
+    fields.append('Version', bpr.version)
+    fields.append('Recommends', bpr.recommends)
+    fields.append('Replaces', bpr.replaces)
+    fields.append('Suggests', bpr.suggests)
+    fields.append('Provides', bpr.provides)
+    fields.append('Depends', bpr.depends)
+    fields.append('Conflicts', bpr.conflicts)
+    fields.append('Pre-Depends', bpr.pre_depends)
+    fields.append('Enhances', bpr.enhances)
+    fields.append('Breaks', bpr.breaks)
+    fields.append('Essential', essential)
+    fields.append('Filename', bin_filepath)
+    fields.append('Size', bin_size)
+    fields.append('MD5sum', bin_md5)
+    fields.append('SHA1', bin_sha1)
+    fields.append('SHA256', bin_sha256)
+    fields.append('Phased-Update-Percentage', phased_update_percentage)
+    fields.append('Description', bin_description)
+    if separate_long_descriptions:
+        fields.append('Description-md5', bin_description_md5)
+    if bpr.user_defined_fields:
+        fields.extend(bpr.user_defined_fields)
+
+    # XXX cprov 2006-11-03: the extra override fields (Bugs, Origin and
+    # Task) included in the template be were not populated.
+    # When we have the information this will be the place to fill them.
+
+    return fields
+
+
+def build_translations_stanza_fields(bpr, packages):
+    """See `IPublishing`."""
+    bin_description = format_description(bpr.summary, bpr.description)
+    # Our formatted description isn't \n-terminated, but apt
+    # considers the trailing \n to be part of the data to hash.
+    bin_description_md5 = hashlib.md5(
+        bin_description.encode('utf-8') + '\n').hexdigest()
+    if (bpr.name, bin_description_md5) not in packages:
+        fields = IndexStanzaFields()
+        fields.append('Package', bpr.name)
+        fields.append('Description-md5', bin_description_md5)
+        fields.append('Description-en', bin_description)
+        packages.add((bpr.name, bin_description_md5))
+
+        return fields
+    else:
+        return None

=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2014-10-29 14:08:19 +0000
+++ lib/lp/archivepublisher/publishing.py	2014-10-31 13:23:01 +0000
@@ -39,6 +39,11 @@
     write_htaccess,
     write_htpasswd,
     )
+from lp.archivepublisher.indices import (
+    build_binary_stanza_fields,
+    build_source_stanza_fields,
+    build_translations_stanza_fields,
+    )
 from lp.archivepublisher.interfaces.archivesigningkey import (
     IArchiveSigningKey,
     )
@@ -645,8 +650,9 @@
         if (not distroseries.include_long_descriptions and
                 getFeatureFlag("soyuz.ppa.separate_long_descriptions")):
             # If include_long_descriptions is False and the feature flag is
-            # enabled, create a Translation-en file. getIndexStanza() will
-            # also omit long descriptions from the Packages.
+            # enabled, create a Translation-en file.
+            # build_binary_stanza_fields will also omit long descriptions
+            # from the Packages.
             separate_long_descriptions = True
             packages = set()
             translation_en = RepositoryIndexFile(
@@ -660,8 +666,9 @@
 
         for spp in distroseries.getSourcePackagePublishing(
                 pocket, component, self.archive):
-            stanza = spp.getIndexStanza().encode('utf8') + '\n\n'
-            source_index.write(stanza)
+            stanza = build_source_stanza_fields(
+                spp.sourcepackagerelease, spp.component, spp.section)
+            source_index.write(stanza.makeOutput().encode('utf-8') + '\n\n')
 
         source_index.close()
 
@@ -693,19 +700,24 @@
                     # for, eg. ddebs where publish_debug_symbols is
                     # disabled.
                     continue
-                stanza = bpp.getIndexStanza(separate_long_descriptions).encode(
-                    'utf-8') + '\n\n'
-                indices[subcomp].write(stanza)
+                stanza = build_binary_stanza_fields(
+                    bpp.binarypackagerelease, bpp.component, bpp.section,
+                    bpp.priority, bpp.phased_update_percentage,
+                    separate_long_descriptions)
+                indices[subcomp].write(
+                    stanza.makeOutput().encode('utf-8') + '\n\n')
                 if separate_long_descriptions:
-                    # If the (Package, Description-md5) pair already exists in
-                    # the set, getTranslationsStanza will return None.
-                    # Otherwise it will add the pair to the set and return a
-                    # stanza to be written to Translation-en.
-                    translation_stanza = bpp.getTranslationsStanza(packages)
-                    if translation_stanza:
-                        translation_stanza = translation_stanza.encode(
-                            'utf-8') + '\n\n'
-                        translation_en.write(translation_stanza)
+                    # If the (Package, Description-md5) pair already exists
+                    # in the set, build_translations_stanza_fields will
+                    # return None. Otherwise it will add the pair to
+                    # the set and return a stanza to be written to
+                    # Translation-en.
+                    translation_stanza = build_translations_stanza_fields(
+                        bpp.binarypackagerelease, packages)
+                    if translation_stanza is not None:
+                        translation_en.write(
+                            translation_stanza.makeOutput().encode('utf-8')
+                            + '\n\n')
 
             for index in indices.itervalues():
                 index.close()

=== renamed file 'lib/lp/soyuz/tests/test_publish_archive_indexes.py' => 'lib/lp/archivepublisher/tests/test_indices.py'
--- lib/lp/soyuz/tests/test_publish_archive_indexes.py	2013-06-18 05:40:26 +0000
+++ lib/lp/archivepublisher/tests/test_indices.py	2014-10-31 13:23:01 +0000
@@ -9,10 +9,27 @@
 
 import apt_pkg
 
-from lp.soyuz.model.publishing import IndexStanzaFields
+from lp.archivepublisher.indices import (
+    build_binary_stanza_fields,
+    build_source_stanza_fields,
+    IndexStanzaFields,
+    )
 from lp.soyuz.tests.test_publishing import TestNativePublishingBase
 
 
+def build_bpph_stanza(bpph):
+    return build_binary_stanza_fields(
+        bpph.binarypackagerelease, bpph.component, bpph.section,
+        bpph.priority, bpph.phased_update_percentage,
+        False)
+
+
+def build_spph_stanza(spph):
+    return build_source_stanza_fields(
+        spph.sourcepackagerelease, spph.component,
+        spph.section)
+
+
 def get_field(stanza_fields, name):
     return dict(stanza_fields.fields).get(name)
 
@@ -70,7 +87,7 @@
              u'Checksums-Sha256:',
              u' %s 28 foo_666.dsc' % self.dsc_sha256,
              ],
-            pub_source.getIndexStanza().splitlines())
+            build_spph_stanza(pub_source).makeOutput().splitlines())
 
     def testSourceStanzaCustomFields(self):
         """Check just-created source publication Index stanza
@@ -108,7 +125,7 @@
              u'Checksums-Sha256:',
              u' %s 28 foo_666.dsc' % self.dsc_sha256,
              u'Python-Version: < 1.5'],
-            pub_source.getIndexStanza().splitlines())
+            build_spph_stanza(pub_source).makeOutput().splitlines())
 
     def testBinaryStanza(self):
         """Check just-created binary publication Index stanza.
@@ -149,7 +166,7 @@
              u'Description: Foo app is great',
              u' Well ...',
              u' it does nothing, though'],
-            pub_binary.getIndexStanza().splitlines())
+            build_bpph_stanza(pub_binary).makeOutput().splitlines())
 
     def testBinaryStanzaWithCustomFields(self):
         """Check just-created binary publication Index stanza with
@@ -189,7 +206,7 @@
              u' Well ...',
              u' it does nothing, though',
              u'Python-Version: >= 2.4'],
-            pub_binary.getIndexStanza().splitlines())
+            build_bpph_stanza(pub_binary).makeOutput().splitlines())
 
     def testBinaryStanzaDescription(self):
         """ Check the description field.
@@ -240,7 +257,7 @@
              u' .',
              u' %s' % ('x' * 100),
              ],
-            pub_binary.getIndexStanza().splitlines())
+            build_bpph_stanza(pub_binary).makeOutput().splitlines())
 
     def testBinaryStanzaWithNonAscii(self):
         """Check how will be a stanza with non-ascii content
@@ -272,7 +289,7 @@
              u'Description: Foo app is great',
              u' Using non-ascii as: \xe7\xe3\xe9\xf3',
              ],
-            pub_binary.getIndexStanza().splitlines())
+            build_bpph_stanza(pub_binary).makeOutput().splitlines())
 
     def testBinaryOmitsIdenticalSourceName(self):
         # Binaries omit the Source field if it identical to Package.
@@ -281,7 +298,7 @@
             binaryname='foo', pub_source=pub_source)[0]
         self.assertIs(
             None,
-            get_field(pub_binary.buildIndexStanzaFields(), 'Source'))
+            get_field(build_bpph_stanza(pub_binary), 'Source'))
 
     def testBinaryIncludesDifferingSourceName(self):
         # Binaries include a Source field if their name differs.
@@ -290,7 +307,7 @@
             binaryname='foo-bin', pub_source=pub_source)[0]
         self.assertEqual(
             u'foo',
-            get_field(pub_binary.buildIndexStanzaFields(), 'Source'))
+            get_field(build_bpph_stanza(pub_binary), 'Source'))
 
     def testBinaryIncludesDifferingSourceVersion(self):
         # Binaries also include a Source field if their versions differ.
@@ -299,7 +316,7 @@
             binaryname='foo', version='999', pub_source=pub_source)[0]
         self.assertEqual(
             u'foo (666)',
-            get_field(pub_binary.buildIndexStanzaFields(), 'Source'))
+            get_field(build_bpph_stanza(pub_binary), 'Source'))
 
 
 class TestNativeArchiveIndexesReparsing(TestNativePublishingBase):
@@ -316,7 +333,7 @@
         """Helper method to return the apt_pkg parser for the stanza."""
         index_filename = tempfile.mktemp()
         index_file = open(index_filename, 'w')
-        index_file.write(stanza.encode('utf-8'))
+        index_file.write(stanza.makeOutput().encode('utf-8'))
         index_file.close()
 
         parser = apt_pkg.TagFile(open(index_filename))
@@ -328,27 +345,27 @@
 
         return section
 
-    def test_getIndexStanza_binary_stanza(self):
+    def test_binary_stanza(self):
         """Check a binary stanza with APT parser."""
         pub_binary = self.getPubBinaries()[0]
 
-        section = self.write_stanza_and_reparse(pub_binary.getIndexStanza())
+        section = self.write_stanza_and_reparse(build_bpph_stanza(pub_binary))
 
         self.assertEqual(section.get('Package'), 'foo-bin')
         self.assertEqual(
             section.get('Description').splitlines(),
             ['Foo app is great', ' Well ...', ' it does nothing, though'])
 
-    def test_getIndexStanza_source_stanza(self):
+    def test_source_stanza(self):
         """Check a source stanza with APT parser."""
         pub_source = self.getPubSource()
 
-        section = self.write_stanza_and_reparse(pub_source.getIndexStanza())
+        section = self.write_stanza_and_reparse(build_spph_stanza(pub_source))
 
         self.assertEqual(section.get('Package'), 'foo')
         self.assertEqual(section.get('Maintainer'), 'Foo Bar <foo@xxxxxxx>')
 
-    def test_getIndexStanza_with_corrupt_dsc_binaries(self):
+    def test_source_with_corrupt_dsc_binaries(self):
         """Ensure corrupt binary fields are written correctly to indexes.
 
         This is a regression test for bug 436182.
@@ -378,7 +395,7 @@
         pub_source.sourcepackagerelease.dsc_binaries = (
             'foo_bin,\nbar_bin,\nzed_bin')
 
-        section = self.write_stanza_and_reparse(pub_source.getIndexStanza())
+        section = self.write_stanza_and_reparse(build_spph_stanza(pub_source))
 
         self.assertEqual('foo', section['Package'])
 
@@ -391,7 +408,7 @@
         # Without the fix, the second binary would not be parsed at all.
         self.assertEqual('foo_bin,\n bar_bin,\n zed_bin', section['Binary'])
 
-    def test_getIndexStanza_with_correct_dsc_binaries(self):
+    def test_source_with_correct_dsc_binaries(self):
         """Ensure correct binary fields are written correctly to indexes.
 
         During upload, our custom parser at:
@@ -412,7 +429,7 @@
         pub_source.sourcepackagerelease.dsc_binaries = (
             'foo_bin,\n bar_bin,\n zed_bin')
 
-        section = self.write_stanza_and_reparse(pub_source.getIndexStanza())
+        section = self.write_stanza_and_reparse(build_spph_stanza(pub_source))
 
         self.assertEqual('foo', section['Package'])
 

=== modified file 'lib/lp/registry/model/distroseries.py'
--- lib/lp/registry/model/distroseries.py	2014-08-19 03:46:03 +0000
+++ lib/lp/registry/model/distroseries.py	2014-10-31 13:23:01 +0000
@@ -1028,8 +1028,8 @@
                 PackagePublishingStatus.PUBLISHED)
 
         def eager_load(spphs):
-            # Preload everything which will be used by
-            # SourcePackagePublishingHistory.buildIndexStanzaFields.
+            # Preload everything which will be used by archivepublisher's
+            # build_source_stanza_fields.
             load_related(Section, spphs, ["sectionID"])
             sprs = load_related(
                 SourcePackageRelease, spphs, ["sourcepackagereleaseID"])
@@ -1064,8 +1064,8 @@
                 PackagePublishingStatus.PUBLISHED)
 
         def eager_load(bpphs):
-            # Preload everything which will be used by
-            # BinaryPackagePublishingHistory.buildIndexStanzaFields.
+            # Preload everything which will be used by archivepublisher's
+            # build_binary_stanza_fields.
             load_related(Section, bpphs, ["sectionID"])
             bprs = load_related(
                 BinaryPackageRelease, bpphs, ["binarypackagereleaseID"])

=== modified file 'lib/lp/registry/tests/test_distroseries.py'
--- lib/lp/registry/tests/test_distroseries.py	2014-07-31 00:23:58 +0000
+++ lib/lp/registry/tests/test_distroseries.py	2014-10-31 13:23:01 +0000
@@ -10,17 +10,19 @@
     ]
 
 from functools import partial
-from logging import getLogger
 
 from testtools.matchers import Equals
 import transaction
 from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
 
+from lp.archivepublisher.indices import (
+    build_binary_stanza_fields,
+    build_source_stanza_fields,
+    )
 from lp.registry.errors import NoSuchDistroSeries
 from lp.registry.interfaces.distroseries import IDistroSeriesSet
 from lp.registry.interfaces.pocket import PackagePublishingPocket
-from lp.registry.interfaces.series import SeriesStatus
 from lp.services.database.interfaces import IStore
 from lp.soyuz.enums import (
     ArchivePurpose,
@@ -510,7 +512,8 @@
             for spp in self.series.getSourcePackagePublishing(
                     PackagePublishingPocket.RELEASE, self.universe_component,
                     self.series.main_archive):
-                spp.getIndexStanza()
+                build_source_stanza_fields(
+                    spp.sourcepackagerelease, spp.component, spp.section)
 
         recorder1, recorder2 = record_two_runs(
             get_index_stanzas,
@@ -528,7 +531,9 @@
             for bpp in self.series.getBinaryPackagePublishing(
                     das.architecturetag, PackagePublishingPocket.RELEASE,
                     self.universe_component, self.series.main_archive):
-                bpp.getIndexStanza()
+                build_binary_stanza_fields(
+                    bpp.binarypackagerelease, bpp.component, bpp.section,
+                    bpp.priority, bpp.phased_update_percentage, False)
 
         das = self.factory.makeDistroArchSeries(distroseries=self.series)
         recorder1, recorder2 = record_two_runs(

=== modified file 'lib/lp/soyuz/doc/publishing.txt'
--- lib/lp/soyuz/doc/publishing.txt	2014-07-08 06:34:37 +0000
+++ lib/lp/soyuz/doc/publishing.txt	2014-10-31 13:23:01 +0000
@@ -30,8 +30,6 @@
     >>> from lp.testing import verifyObject
     >>> from lp.registry.interfaces.distroseries import IDistroSeries
     >>> from lp.registry.interfaces.sourcepackage import ISourcePackage
-    >>> from lp.soyuz.interfaces.distributionsourcepackagerelease import (
-    ...     IDistributionSourcePackageRelease)
     >>> from lp.soyuz.interfaces.publishing import (
     ...     IBinaryPackagePublishingHistory,
     ...     ISourcePackagePublishingHistory,
@@ -302,59 +300,6 @@
     >>> print bpph.section_name
     base
 
-
-Stanza generation
-=================
-
-    >>> from lp.soyuz.interfaces.publishing import (
-    ...     IBinaryPackagePublishingHistory,
-    ...     ISourcePackageFilePublishing,
-    ...     )
-
-Retrieve any SourcePackagePublishingHistory entry.
-
-    >>> spph = SourcePackagePublishingHistory.get(10)
-
-    >>> print spph.displayname
-    alsa-utils 1.0.8-1ubuntu1 in warty
-
-A 'stanza' is the name given to a group of attributes related to one
-source or binary package in the archive index file, it provides
-information to APT, things like, package name, pool path, DSC format,
-files checksum, etc.
-
-The archive index should contain one entry for each source or binary
-currently published in the archive, it is usually partionated by
-component:
-
-<mirror_url_base>/ubuntu/edgy/main/binary-i386/Packages
-<mirror_url_base>/ubuntu/edgy/main/source/Sources
-
-The archive index is also available in in compressed formats.
-
-    >>> print spph.getIndexStanza() #doctest: -NORMALIZE_WHITESPACE
-    Package: alsa-utils
-    Binary: alsa-mixer
-    Version: 1.0.8-1ubuntu1
-    Section: base
-    Maintainer: Mark Shuttleworth <mark@xxxxxxxxxxx>
-    Architecture: all
-    Standards-Version: 3.6.2
-    Format: 1.0
-    Directory: pool/main/a/alsa-utils
-    Files:
-     01234567890123456789012345678925 3 alsa-utils_1.0.8-1ubuntu1.dsc
-    Checksums-Sha1:
-     a10856bfea3f0bdb09550dd41f3c5bc275da8a33 3 alsa-utils_1.0.8-1ubuntu1.dsc
-    Checksums-Sha256:
-     0123456789012345678901234567890123456789012345678901234567890123 3 alsa-utils_1.0.8-1ubuntu1.dsc
-
-
-Empty fields like are suppressed, like 'Build-Depends' or
-'Build-Depends-Indep' to avoid extra charge on download.
-See sourcepackagerelease.txt for further information about the fields
-in question.
-
 Files published are accessible via the files property:
 
     >>> any_pub_file = spph.files[-1]
@@ -924,32 +869,6 @@
     >>> IBinaryPackagePublishingHistory.providedBy(bpph)
     True
 
-Generating respective "Packages" stanzas:
-
-    >>> print bpph.getIndexStanza() #doctest: -NORMALIZE_WHITESPACE
-    Package: mozilla-firefox
-    Priority: important
-    Section: editors
-    Maintainer: Mark Shuttleworth <mark@xxxxxxxxxxxxx>
-    Architecture: i386
-    Version: 0.9
-    Recommends: gcc-3.4-base, libc6 (>= 2.3.2.ds1-4), gcc-3.4 (>= 3.4.1-4sarge1), gcc-3.4 (<< 3.4.2), libstdc++6-dev (>= 3.4.1-4sarge1)
-    Replaces: gnome-mozilla-browser
-    Suggests: firefox-gnome-support (= 1.0.7-0ubuntu20), latex-xft-fonts, xprint
-    Provides: mozilla-firefox
-    Depends: gcc-3.4-base, libc6 (>= 2.3.2.ds1-4), gcc-3.4 (>= 3.4.1-4sarge1), gcc-3.4 (<< 3.4.2), libstdc++6-dev (>= 3.4.1-4sarge1)
-    Conflicts: firefox, mozilla-web-browser
-    Pre-Depends: pmount, foo
-    Enhances: pmount, bar
-    Breaks: pmount, baz
-    Filename: pool/universe/m/mozilla-firefox/mozilla-firefox_0.9_i386.deb
-    Size: 3
-    MD5sum: 01234567890123456789012345678926
-    SHA1: 5a04c7b5ea3f0fdbc95d0dd47f3c5bc275da8a33
-    SHA256: 0123456789012345678901234567890123456789012345678901234567890123
-    Description: Mozilla Firefox Web Browser
-     Mozilla Firefox Web Browser is .....
-
     >>> any_file = bpph.files[-1]
     >>> IBinaryPackageFilePublishing.providedBy(any_file)
     True

=== modified file 'lib/lp/soyuz/interfaces/publishing.py'
--- lib/lp/soyuz/interfaces/publishing.py	2014-10-31 10:34:51 +0000
+++ lib/lp/soyuz/interfaces/publishing.py	2014-10-31 13:23:01 +0000
@@ -168,26 +168,6 @@
         If all the files get published correctly update its status properly.
         """
 
-    def getIndexStanza():
-        """Return archive index stanza contents
-
-        It's based on the locally provided buildIndexStanzaTemplate method,
-        which differs for binary and source instances.
-
-        :param separate_long_descriptions: if True, the long description will
-            be omitted from the stanza and Description-md5 will be included.
-        """
-
-    def buildIndexStanzaFields():
-        """Build a map of fields and values to be in the Index file.
-
-        The fields and values ae mapped into a dictionary, where the key is
-        the field name and value is the value string.
-
-        :param separate_long_descriptions: if True, the long description will
-            be omitted from the stanza and Description-md5 will be included.
-        """
-
     def requestObsolescence():
         """Make this publication obsolete.
 
@@ -848,50 +828,6 @@
             representing the binaries copied to the destination location.
         """
 
-    def getIndexStanza(separate_long_descriptions=False):
-        """Return archive index stanza contents
-
-        It's based on the locally provided buildIndexStanzaTemplate method,
-        which differs for binary and source instances.
-
-        :param separate_long_descriptions: if True, the long description will
-            be omitted from the stanza and Description-md5 will be included.
-        """
-
-    def buildIndexStanzaFields(separate_long_descriptions=False):
-        """Build a map of fields and values to be in the Index file.
-
-        The fields and values ae mapped into a dictionary, where the key is
-        the field name and value is the value string.
-
-        :param separate_long_descriptions: if True, the long description will
-            be omitted from the stanza and Description-md5 will be included.
-        """
-
-    def getTranslationsStanza(packages):
-        """Return archive Translation-en stanza contents
-
-        It's based on the locally provided buildTranslationsStanzaTemplate
-        method, which differs for binary and source instances.
-
-        :param packages: a set of (Package, Description-md5) tuples used to
-            determine if a package has already been added to the translation
-            file. The (Package, Description-md5) tuple will be added if it
-            doesn't already exist.
-        """
-
-    def buildTranslationsStanzaFields(packages):
-        """Build a map of fields and values to be in the Translation-en file.
-
-        The fields and values ae mapped into a dictionary, where the key is
-        the field name and value is the value string.
-
-        :param packages: a set of (Package, Description-md5) tuples used to
-            determine if a package has already been added to the translation
-            file. The (Package, Description-md5) tuple will be added if it
-            doesn't already exist.
-        """
-
     @export_read_operation()
     def getDownloadCount():
         """Get the download count of this binary package in this archive.

=== modified file 'lib/lp/soyuz/model/publishing.py'
--- lib/lp/soyuz/model/publishing.py	2014-10-31 10:34:51 +0000
+++ lib/lp/soyuz/model/publishing.py	2014-10-31 13:23:01 +0000
@@ -7,7 +7,6 @@
     'BinaryPackageFilePublishing',
     'BinaryPackagePublishingHistory',
     'get_current_source_releases',
-    'IndexStanzaFields',
     'makePoolPath',
     'PublishingSet',
     'SourcePackageFilePublishing',
@@ -17,10 +16,8 @@
 
 from collections import defaultdict
 from datetime import datetime
-import hashlib
 from operator import attrgetter
 import os
-import re
 import sys
 
 import pytz
@@ -310,11 +307,6 @@
         else:
             self.setPublished()
 
-    def getIndexStanza(self, separate_long_descriptions=False):
-        """See `IPublishing`."""
-        fields = self.buildIndexStanzaFields(separate_long_descriptions)
-        return fields.makeOutput()
-
     def setSuperseded(self):
         """Set to SUPERSEDED status."""
         self.status = PackagePublishingStatus.SUPERSEDED
@@ -353,67 +345,6 @@
         return self.section.name
 
 
-class IndexStanzaFields:
-    """Store and format ordered Index Stanza fields."""
-
-    def __init__(self):
-        self._names_lower = set()
-        self.fields = []
-
-    def append(self, name, value):
-        """Append an (field, value) tuple to the internal list.
-
-        Then we can use the FIFO-like behaviour in makeOutput().
-        """
-        if name.lower() in self._names_lower:
-            return
-        self._names_lower.add(name.lower())
-        self.fields.append((name, value))
-
-    def extend(self, entries):
-        """Extend the internal list with the key-value pairs in entries.
-        """
-        for name, value in entries:
-            self.append(name, value)
-
-    def makeOutput(self):
-        """Return a line-by-line aggregation of appended fields.
-
-        Empty fields values will cause the exclusion of the field.
-        The output order will preserve the insertion order, FIFO.
-        """
-        output_lines = []
-        for name, value in self.fields:
-            if not value:
-                continue
-
-            # do not add separation space for the special file list fields.
-            if name not in ('Files', 'Checksums-Sha1', 'Checksums-Sha256'):
-                value = ' %s' % value
-
-            # XXX Michael Nelson 20090930 bug=436182. We have an issue
-            # in the upload parser that has
-            #   1. introduced '\n' at the end of multiple-line-spanning
-            #      fields, such as dsc_binaries, but potentially others,
-            #   2. stripped the leading space from each subsequent line
-            #      of dsc_binaries values that span multiple lines.
-            # This is causing *incorrect* Source indexes to be created.
-            # This work-around can be removed once the fix for bug 436182
-            # is in place and the tainted data has been cleaned.
-            # First, remove any trailing \n or spaces.
-            value = value.rstrip()
-
-            # Second, as we have corrupt data where subsequent lines
-            # of values spanning multiple lines are not preceded by a
-            # space, we ensure that any \n in the value that is *not*
-            # followed by a white-space character has a space inserted.
-            value = re.sub(r"\n(\S)", r"\n \1", value)
-
-            output_lines.append('%s:%s' % (name, value))
-
-        return '\n'.join(output_lines)
-
-
 class SourcePackagePublishingHistory(SQLBase, ArchivePublisherBase):
     """A source package release publishing record."""
     implements(ISourcePackagePublishingHistory)
@@ -636,52 +567,6 @@
         name = release.sourcepackagename.name
         return "%s %s in %s" % (name, release.version, self.distroseries.name)
 
-    def _formatFileList(self, l):
-        return ''.join('\n %s %s %s' % ((h,) + f) for (h, f) in l)
-
-    def buildIndexStanzaFields(self):
-        """See `IPublishing`."""
-        # Special fields preparation.
-        spr = self.sourcepackagerelease
-        pool_path = makePoolPath(spr.name, self.component.name)
-        files_list = []
-        sha1_list = []
-        sha256_list = []
-        for spf in spr.files:
-            common = (
-                spf.libraryfile.content.filesize, spf.libraryfile.filename)
-            files_list.append((spf.libraryfile.content.md5, common))
-            sha1_list.append((spf.libraryfile.content.sha1, common))
-            sha256_list.append((spf.libraryfile.content.sha256, common))
-        # Filling stanza options.
-        fields = IndexStanzaFields()
-        fields.append('Package', spr.name)
-        fields.append('Binary', spr.dsc_binaries)
-        fields.append('Version', spr.version)
-        fields.append('Section', self.section.name)
-        fields.append('Maintainer', spr.dsc_maintainer_rfc822)
-        fields.append('Build-Depends', spr.builddepends)
-        fields.append('Build-Depends-Indep', spr.builddependsindep)
-        fields.append('Build-Conflicts', spr.build_conflicts)
-        fields.append('Build-Conflicts-Indep', spr.build_conflicts_indep)
-        fields.append('Architecture', spr.architecturehintlist)
-        fields.append('Standards-Version', spr.dsc_standards_version)
-        fields.append('Format', spr.dsc_format)
-        fields.append('Directory', pool_path)
-        fields.append('Files', self._formatFileList(files_list))
-        fields.append('Checksums-Sha1', self._formatFileList(sha1_list))
-        fields.append('Checksums-Sha256', self._formatFileList(sha256_list))
-        fields.append('Homepage', spr.homepage)
-        if spr.user_defined_fields:
-            fields.extend(spr.user_defined_fields)
-
-        return fields
-
-    def getIndexStanza(self):
-        """See `IPublishing`."""
-        fields = self.buildIndexStanzaFields()
-        return fields.makeOutput()
-
     def supersede(self, dominant=None, logger=None):
         """See `ISourcePackagePublishingHistory`."""
         assert self.status in active_publishing_status, (
@@ -936,136 +821,6 @@
         else:
             super(BinaryPackagePublishingHistory, self).publish(diskpool, log)
 
-    def _getFormattedDescription(self, summary, description):
-        # description field in index is an association of summary and
-        # description or the summary only if include_long_descriptions
-        # is false, as:
-        #
-        # Descrition: <SUMMARY>\n
-        #  <DESCRIPTION L1>
-        #  ...
-        #  <DESCRIPTION LN>
-        descr_lines = [line.lstrip() for line in description.splitlines()]
-        bin_description = '%s\n %s' % (summary, '\n '.join(descr_lines))
-        return bin_description
-
-    def buildIndexStanzaFields(self, separate_long_descriptions=False):
-        """See `IPublishing`."""
-        bpr = self.binarypackagerelease
-        spr = bpr.build.source_package_release
-
-        # binaries have only one file, the DEB
-        bin_file = bpr.files[0]
-        bin_filename = bin_file.libraryfile.filename
-        bin_size = bin_file.libraryfile.content.filesize
-        bin_md5 = bin_file.libraryfile.content.md5
-        bin_sha1 = bin_file.libraryfile.content.sha1
-        bin_sha256 = bin_file.libraryfile.content.sha256
-        bin_filepath = os.path.join(
-            makePoolPath(spr.name, self.component.name), bin_filename)
-        description = self._getFormattedDescription(
-            bpr.summary, bpr.description)
-        # Our formatted description isn't \n-terminated, but apt
-        # considers the trailing \n to be part of the data to hash.
-        bin_description_md5 = hashlib.md5(
-            description.encode('utf-8') + '\n').hexdigest()
-        if separate_long_descriptions:
-            # If distroseries.include_long_descriptions is False, the
-            # description should be the summary
-            bin_description = bpr.summary
-        else:
-            bin_description = description
-
-        # Dealing with architecturespecific field.
-        # Present 'all' in every archive index for architecture
-        # independent binaries.
-        if bpr.architecturespecific:
-            architecture = bpr.build.distro_arch_series.architecturetag
-        else:
-            architecture = 'all'
-
-        essential = None
-        if bpr.essential:
-            essential = 'yes'
-
-        source = None
-        if bpr.version != spr.version:
-            source = '%s (%s)' % (spr.name, spr.version)
-        elif bpr.name != spr.name:
-            source = spr.name
-
-        fields = IndexStanzaFields()
-        fields.append('Package', bpr.name)
-        fields.append('Source', source)
-        fields.append('Priority', self.priority.title.lower())
-        fields.append('Section', self.section.name)
-        fields.append('Installed-Size', bpr.installedsize)
-        fields.append('Maintainer', spr.dsc_maintainer_rfc822)
-        fields.append('Architecture', architecture)
-        fields.append('Version', bpr.version)
-        fields.append('Recommends', bpr.recommends)
-        fields.append('Replaces', bpr.replaces)
-        fields.append('Suggests', bpr.suggests)
-        fields.append('Provides', bpr.provides)
-        fields.append('Depends', bpr.depends)
-        fields.append('Conflicts', bpr.conflicts)
-        fields.append('Pre-Depends', bpr.pre_depends)
-        fields.append('Enhances', bpr.enhances)
-        fields.append('Breaks', bpr.breaks)
-        fields.append('Essential', essential)
-        fields.append('Filename', bin_filepath)
-        fields.append('Size', bin_size)
-        fields.append('MD5sum', bin_md5)
-        fields.append('SHA1', bin_sha1)
-        fields.append('SHA256', bin_sha256)
-        fields.append(
-            'Phased-Update-Percentage', self.phased_update_percentage)
-        fields.append('Description', bin_description)
-        if separate_long_descriptions:
-            fields.append('Description-md5', bin_description_md5)
-        if bpr.user_defined_fields:
-            fields.extend(bpr.user_defined_fields)
-
-        # XXX cprov 2006-11-03: the extra override fields (Bugs, Origin and
-        # Task) included in the template be were not populated.
-        # When we have the information this will be the place to fill them.
-
-        return fields
-
-    def getIndexStanza(self, separate_long_descriptions=False):
-        """See `IPublishing`."""
-        fields = self.buildIndexStanzaFields(separate_long_descriptions)
-        return fields.makeOutput()
-
-    def buildTranslationsStanzaFields(self, packages):
-        """See `IPublishing`."""
-        bpr = self.binarypackagerelease
-
-        bin_description = self._getFormattedDescription(
-            bpr.summary, bpr.description)
-        # Our formatted description isn't \n-terminated, but apt
-        # considers the trailing \n to be part of the data to hash.
-        bin_description_md5 = hashlib.md5(
-            bin_description.encode('utf-8') + '\n').hexdigest()
-        if (bpr.name, bin_description_md5) not in packages:
-            fields = IndexStanzaFields()
-            fields.append('Package', bpr.name)
-            fields.append('Description-md5', bin_description_md5)
-            fields.append('Description-en', bin_description)
-            packages.add((bpr.name, bin_description_md5))
-
-            return fields
-        else:
-            return None
-
-    def getTranslationsStanza(self, packages):
-        """See `IPublishing`."""
-        fields = self.buildTranslationsStanzaFields(packages)
-        if fields is None:
-            return None
-        else:
-            return fields.makeOutput()
-
     def _getOtherPublications(self):
         """Return remaining publications with the same overrides.

Follow ups

[Merge] lp:~wgrant/launchpad/indices-to-archivepublisher into lp:launchpad
From: noreply, 2014-10-31
Re: [Merge] lp:~wgrant/launchpad/indices-to-archivepublisher into lp:launchpad
From: William Grant, 2014-10-31