launchpad-reviewers team mailing list archive
  
  - 
     launchpad-reviewers team launchpad-reviewers team
- 
    Mailing list archive
  
- 
    Message #24724
  
 [Merge] ~twom/launchpad:oci-file-upload-deduplication into launchpad:master
  
Tom Wardill has proposed merging ~twom/launchpad:oci-file-upload-deduplication into launchpad:master.
Commit message:
Reuse existing image layers in new uploads
Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~twom/launchpad/+git/launchpad/+merge/383859
Rather than uploading each layer for each image to the librarian, reuse any that are existing.
This could introduce a race condition with garbage collection, but as we don't currently have any, it's safe for now, but should be considered in the design for garbo.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~twom/launchpad:oci-file-upload-deduplication into launchpad:master.
diff --git a/lib/lp/archiveuploader/ocirecipeupload.py b/lib/lp/archiveuploader/ocirecipeupload.py
index c3db6cb..33ce648 100644
--- a/lib/lp/archiveuploader/ocirecipeupload.py
+++ b/lib/lp/archiveuploader/ocirecipeupload.py
@@ -17,6 +17,7 @@ from zope.component import getUtility
 
 from lp.archiveuploader.utils import UploadError
 from lp.buildmaster.enums import BuildStatus
+from lp.oci.interfaces.ocirecipebuild import IOCIFileSet
 from lp.services.helpers import filenameToContentType
 from lp.services.librarian.interfaces import ILibraryFileAliasSet
 
@@ -62,6 +63,15 @@ class OCIRecipeUpload:
                         "{}.tar.gz".format(layer_id)
                     )
                     self.logger.debug("Layer path: {}".format(layer_path))
+                    # If the file is already in the librarian, the file
+                    # won't exist on disk, so we can just reuse it.
+                    existing_file = getUtility(IOCIFileSet).getByLayerDigest(
+                        digest)
+                    if existing_file:
+                        build.addFile(
+                            existing_file.library_file,
+                            layer_file_digest=digest)
+                        continue
                     if not os.path.exists(layer_path):
                         raise UploadError(
                             "Missing layer file: {}.".format(layer_id))
diff --git a/lib/lp/archiveuploader/tests/test_ocirecipeupload.py b/lib/lp/archiveuploader/tests/test_ocirecipeupload.py
index bae4f4a..75e2455 100644
--- a/lib/lp/archiveuploader/tests/test_ocirecipeupload.py
+++ b/lib/lp/archiveuploader/tests/test_ocirecipeupload.py
@@ -113,3 +113,31 @@ class TestOCIRecipeUploads(TestUploadProcessorBase):
             "ERROR Missing layer file: layer_2.",
             self.log.getLogBuffer())
         self.assertFalse(self.build.verifySuccessfulUpload())
+
+    def test_reuse_existing_file(self):
+        # The digests.json specifies a file that already exists in the
+        # librarian, but not on disk
+        self.assertFalse(self.build.verifySuccessfulUpload())
+        del get_property_cache(self.build).manifest
+        del get_property_cache(self.build).digests
+        upload_dir = os.path.join(
+            self.incoming_folder, "test", str(self.build.id), "ubuntu")
+        write_file(os.path.join(upload_dir, "layer_1.tar.gz"), b"layer_1")
+        write_file(os.path.join(upload_dir, "manifest.json"), b"manifest")
+        write_file(
+            os.path.join(upload_dir, "digests.json"), json.dumps(self.digests))
+
+        # create the existing file
+        self.switchToAdmin()
+        layer_2 = self.factory.makeOCIFile(layer_file_digest="digest_2")
+        Store.of(layer_2.build).flush()
+        self.switchToUploader()
+
+        handler = UploadHandler.forProcessor(
+            self.uploadprocessor, self.incoming_folder, "test", self.build)
+        result = handler.processOCIRecipe(self.log)
+        self.assertEqual(
+            UploadStatusEnum.ACCEPTED, result,
+            "OCI upload failed\nGot: %s" % self.log.getLogBuffer())
+        self.assertEqual(BuildStatus.FULLYBUILT, self.build.status)
+        self.assertTrue(self.build.verifySuccessfulUpload())
diff --git a/lib/lp/oci/configure.zcml b/lib/lp/oci/configure.zcml
index 616fd2a..a7cce5e 100644
--- a/lib/lp/oci/configure.zcml
+++ b/lib/lp/oci/configure.zcml
@@ -97,6 +97,20 @@
         factory="lp.oci.model.ocirecipebuildbehaviour.OCIRecipeBuildBehaviour"
         permission="zope.Public" />
 
+    <!-- OCIFile -->
+    <class class="lp.oci.model.ocirecipebuild.OCIFile">
+        <require
+            permission="launchpad.View"
+            interface="lp.oci.interfaces.ocirecipebuild.IOCIFile" />
+    </class>
+
+    <!-- OCIFileSet -->
+    <securedutility
+        class="lp.oci.model.ocirecipebuild.OCIFileSet"
+        provides="lp.oci.interfaces.ocirecipebuild.IOCIFileSet">
+        <allow interface="lp.oci.interfaces.ocirecipebuild.IOCIFileSet" />
+    </securedutility>
+
     <!-- OCIRegistryCredentials -->
     <class class="lp.oci.model.ociregistrycredentials.OCIRegistryCredentials">
         <require
diff --git a/lib/lp/oci/interfaces/ocirecipebuild.py b/lib/lp/oci/interfaces/ocirecipebuild.py
index 1f00269..95539c0 100644
--- a/lib/lp/oci/interfaces/ocirecipebuild.py
+++ b/lib/lp/oci/interfaces/ocirecipebuild.py
@@ -8,6 +8,7 @@ from __future__ import absolute_import, print_function, unicode_literals
 __metaclass__ = type
 __all__ = [
     'IOCIFile',
+    'IOCIFileSet',
     'IOCIRecipeBuild',
     'IOCIRecipeBuildSet',
     'OCIRecipeBuildRegistryUploadStatus',
@@ -82,6 +83,13 @@ class OCIRecipeBuildRegistryUploadStatus(EnumeratedType):
         """)
 
 
+class IOCIFileSet(Interface):
+    """A file artifact of an OCIRecipeBuild."""
+
+    def getByLayerDigest(layer_file_digest):
+        """Return an `IOCIFile` with the matching layer_file_digest."""
+
+
 class IOCIRecipeBuildView(IPackageBuild):
     """`IOCIRecipeBuild` attributes that require launchpad.View permission."""
 
diff --git a/lib/lp/oci/model/ocirecipebuild.py b/lib/lp/oci/model/ocirecipebuild.py
index 6b8e023..94b7326 100644
--- a/lib/lp/oci/model/ocirecipebuild.py
+++ b/lib/lp/oci/model/ocirecipebuild.py
@@ -43,6 +43,7 @@ from lp.buildmaster.model.packagebuild import PackageBuildMixin
 from lp.oci.interfaces.ocirecipe import IOCIRecipeSet
 from lp.oci.interfaces.ocirecipebuild import (
     IOCIFile,
+    IOCIFileSet,
     IOCIRecipeBuild,
     IOCIRecipeBuildSet,
     OCIRecipeBuildRegistryUploadStatus,
@@ -98,6 +99,16 @@ class OCIFile(Storm):
         self.layer_file_digest = layer_file_digest
 
 
+@implementer(IOCIFileSet)
+class OCIFileSet:
+
+    def getByLayerDigest(self, layer_file_digest):
+        return IStore(OCIFile).find(
+            OCIFile,
+            OCIFile.layer_file_digest == layer_file_digest).order_by(
+                OCIFile.id).first()
+
+
 @implementer(IOCIRecipeBuild)
 class OCIRecipeBuild(PackageBuildMixin, Storm):
 
diff --git a/lib/lp/oci/model/ocirecipebuildbehaviour.py b/lib/lp/oci/model/ocirecipebuildbehaviour.py
index dbef696..6205cfd 100644
--- a/lib/lp/oci/model/ocirecipebuildbehaviour.py
+++ b/lib/lp/oci/model/ocirecipebuildbehaviour.py
@@ -18,6 +18,7 @@ import json
 import os
 
 from twisted.internet import defer
+from zope.component import getUtility
 from zope.interface import implementer
 
 from lp.app.errors import NotFoundError
@@ -32,6 +33,7 @@ from lp.buildmaster.interfaces.buildfarmjobbehaviour import (
 from lp.buildmaster.model.buildfarmjobbehaviour import (
     BuildFarmJobBehaviourBase,
     )
+from lp.oci.interfaces.ocirecipebuild import IOCIFileSet
 from lp.registry.interfaces.series import SeriesStatus
 from lp.services.librarian.utils import copy_and_close
 from lp.snappy.model.snapbuildbehaviour import SnapProxyMixin
@@ -134,12 +136,17 @@ class OCIRecipeBuildBehaviour(SnapProxyMixin, BuildFarmJobBehaviourBase):
                 # This is in the form '<id>/layer.tar', we only need the first
                 layer_filename = "{}.tar.gz".format(layer_id.split('/')[0])
                 digest = digests_section[diff_id]['digest']
-                try:
-                    _, librarian_file, _ = self.build.getLayerFileByDigest(
-                        digest)
-                except NotFoundError:
+                # Check if the file already exists in the librarian
+                oci_file = getUtility(IOCIFileSet).getByLayerDigest(
+                    digest)
+                if oci_file:
+                    librarian_file = oci_file.library_file
+                # If it doesn't, we need to download it
+                else:
                     files.add(layer_filename)
                     continue
+                # If the file already exists, retrieve it from the librarian
+                # so we can add it to the build artifacts
                 layer_path = os.path.join(upload_path, layer_filename)
                 librarian_file.open()
                 copy_and_close(librarian_file, open(layer_path, 'wb'))
diff --git a/lib/lp/oci/tests/test_ocirecipebuild.py b/lib/lp/oci/tests/test_ocirecipebuild.py
index 84a69f4..2ebffde 100644
--- a/lib/lp/oci/tests/test_ocirecipebuild.py
+++ b/lib/lp/oci/tests/test_ocirecipebuild.py
@@ -29,6 +29,7 @@ from lp.oci.interfaces.ocirecipe import (
     OCI_RECIPE_WEBHOOKS_FEATURE_FLAG,
     )
 from lp.oci.interfaces.ocirecipebuild import (
+    IOCIFileSet,
     IOCIRecipeBuild,
     IOCIRecipeBuildSet,
     )
@@ -52,6 +53,32 @@ from lp.testing.layers import (
 from lp.testing.matchers import HasQueryCount
 
 
+class TestOCIFileSet(TestCaseWithFactory):
+
+    layer = LaunchpadZopelessLayer
+
+    def setUp(self):
+        super(TestOCIFileSet, self).setUp()
+        self.useFixture(FeatureFixture({OCI_RECIPE_ALLOW_CREATE: 'on'}))
+
+    def test_implements_interface(self):
+        file_set = getUtility(IOCIFileSet)
+        self.assertProvides(file_set, IOCIFileSet)
+
+    def test_getByLayerDigest(self):
+        digest = "test_digest"
+        oci_file = self.factory.makeOCIFile(layer_file_digest=digest)
+        for _ in range(3):
+            self.factory.makeOCIFile()
+
+        result = getUtility(IOCIFileSet).getByLayerDigest(digest)
+        self.assertEqual(oci_file, result)
+
+    def test_getByLayerDigest_not_matching(self):
+        result = getUtility(IOCIFileSet).getByLayerDigest("not existing")
+        self.assertIsNone(result)
+
+
 class TestOCIRecipeBuild(TestCaseWithFactory):
 
     layer = LaunchpadZopelessLayer
diff --git a/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py b/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py
index 8536072..ee18740 100644
--- a/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py
+++ b/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py
@@ -519,6 +519,41 @@ class TestHandleStatusForOCIRecipeBuild(MakeOCIBuildMixin,
             self.assertEqual(contents, b'retrieved from librarian')
 
     @defer.inlineCallbacks
+    def test_handleStatus_OK_reuse_from_other_build(self):
+        """We should be able to reuse a layer file from a separate build."""
+        self.factory.makeOCIFile(
+            layer_file_digest=u'digest_2',
+            content="layer 2 retrieved from librarian")
+
+        now = datetime.now()
+        mock_datetime = self.useFixture(MockPatch(
+            'lp.buildmaster.model.buildfarmjobbehaviour.datetime')).mock
+        mock_datetime.now = lambda: now
+        with dbuser(config.builddmaster.dbuser):
+            yield self.behaviour.handleStatus(
+                self.build.buildqueue_record, 'OK',
+                {'filemap': self.filemap})
+        self.assertEqual(
+            ['buildlog', 'manifest_hash', 'digests_hash', 'config_1_hash'],
+            self.slave._got_file_record)
+        # This hash should not appear as it is already in the librarian
+        self.assertNotIn('layer_1_hash', self.slave._got_file_record)
+        self.assertNotIn('layer_2_hash', self.slave._got_file_record)
+
+        # layer_2 should have been retrieved from the librarian
+        layer_2_path = os.path.join(
+            self.upload_root,
+            "incoming",
+            self.behaviour.getUploadDirLeaf(self.build.build_cookie),
+            str(self.build.archive.id),
+            self.build.distribution.name,
+            "layer_2.tar.gz"
+        )
+        with open(layer_2_path, 'rb') as layer_2_fp:
+            contents = layer_2_fp.read()
+            self.assertEqual(contents, b'layer 2 retrieved from librarian')
+
+    @defer.inlineCallbacks
     def test_handleStatus_OK_absolute_filepath(self):
 
         self._createTestFile(
diff --git a/lib/lp/security.py b/lib/lp/security.py
index 0eec9f3..cbef34d 100644
--- a/lib/lp/security.py
+++ b/lib/lp/security.py
@@ -117,7 +117,10 @@ from lp.oci.interfaces.ocirecipe import (
     IOCIRecipe,
     IOCIRecipeBuildRequest,
     )
-from lp.oci.interfaces.ocirecipebuild import IOCIRecipeBuild
+from lp.oci.interfaces.ocirecipebuild import (
+    IOCIFile,
+    IOCIRecipeBuild,
+    )
 from lp.oci.interfaces.ociregistrycredentials import IOCIRegistryCredentials
 from lp.registry.enums import PersonVisibility
 from lp.registry.interfaces.announcement import IAnnouncement
@@ -3547,6 +3550,11 @@ class AdminOCIRecipeBuild(AdminByBuilddAdmin):
     usedfor = IOCIRecipeBuild
 
 
+class ViewOCIFile(AnonymousAuthorization):
+    """Anyone can view an `IOCIFile`."""
+    usedfor = IOCIFile
+
+
 class ViewOCIRegistryCredentials(AuthorizationBase):
     permission = 'launchpad.View'
     usedfor = IOCIRegistryCredentials