← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/codeimport-worker-refactor into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/codeimport-worker-refactor into lp:launchpad with lp:~cjwatson/launchpad/codeimport-source-details-refactor as a prerequisite.

Commit message:
Refactor various bits of the code import worker to be less Bazaar-specific.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1469459 in Launchpad itself: "import external code into a LP git repo (natively)"
  https://bugs.launchpad.net/launchpad/+bug/1469459

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/codeimport-worker-refactor/+merge/308122
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/codeimport-worker-refactor into lp:launchpad.
=== modified file 'lib/lp/code/xmlrpc/codeimportscheduler.py'
--- lib/lp/code/xmlrpc/codeimportscheduler.py	2016-10-11 13:46:57 +0000
+++ lib/lp/code/xmlrpc/codeimportscheduler.py	2016-10-11 13:46:57 +0000
@@ -69,10 +69,10 @@
         job = self._getJob(job_id)
         arguments = CodeImportSourceDetails.fromCodeImportJob(
             job).asArguments()
-        branch = job.code_import.branch
-        branch_url = canonical_url(branch)
-        log_file_name = '%s.log' % branch.unique_name[1:].replace('/', '-')
-        return (arguments, branch_url, log_file_name)
+        target = job.code_import.target
+        target_url = canonical_url(target)
+        log_file_name = '%s.log' % target.unique_name[1:].replace('/', '-')
+        return (arguments, target_url, log_file_name)
 
     @return_fault
     def _updateHeartbeat(self, job_id, log_tail):

=== modified file 'lib/lp/code/xmlrpc/tests/test_codeimportscheduler.py'
--- lib/lp/code/xmlrpc/tests/test_codeimportscheduler.py	2016-10-11 13:46:57 +0000
+++ lib/lp/code/xmlrpc/tests/test_codeimportscheduler.py	2016-10-11 13:46:57 +0000
@@ -57,20 +57,20 @@
         self.assertEqual(code_import_job.id, job_id)
 
     def test_getImportDataForJobID(self):
-        # getImportDataForJobID returns the worker arguments, branch url and
+        # getImportDataForJobID returns the worker arguments, target url and
         # log file name for an import corresponding to a particular job.
         code_import_job = self.makeCodeImportJob(running=True)
         code_import = removeSecurityProxy(code_import_job).code_import
-        code_import_arguments, branch_url, log_file_name = \
+        code_import_arguments, target_url, log_file_name = \
             self.api.getImportDataForJobID(code_import_job.id)
         import_as_arguments = CodeImportSourceDetails.fromCodeImportJob(
             code_import_job).asArguments()
         expected_log_file_name = '%s.log' % (
-            code_import.branch.unique_name[1:].replace('/', '-'))
+            code_import.target.unique_name[1:].replace('/', '-'))
         self.assertEqual(
-            (import_as_arguments, canonical_url(code_import.branch),
+            (import_as_arguments, canonical_url(code_import.target),
              expected_log_file_name),
-            (code_import_arguments, branch_url, log_file_name))
+            (code_import_arguments, target_url, log_file_name))
 
     def test_getImportDataForJobID_not_found(self):
         # getImportDataForJobID returns a NoSuchCodeImportJob fault when there

=== modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
--- lib/lp/codehosting/codeimport/tests/test_worker.py	2016-10-11 13:46:57 +0000
+++ lib/lp/codehosting/codeimport/tests/test_worker.py	2016-10-11 13:46:57 +0000
@@ -78,7 +78,7 @@
     get_default_bazaar_branch_store,
     GitImportWorker,
     ImportDataStore,
-    ImportWorker,
+    ToBzrImportWorker,
     )
 from lp.codehosting.safe_open import (
     AcceptAnythingPolicy,
@@ -428,7 +428,7 @@
         source_details = self.factory.makeCodeImportSourceDetails()
         # That the remote name is like this is part of the interface of
         # ImportDataStore.
-        remote_name = '%08x.tar.gz' % (source_details.branch_id,)
+        remote_name = '%08x.tar.gz' % (source_details.target_id,)
         local_name = '%s.tar.gz' % (self.factory.getUniqueString(),)
         transport = self.get_transport()
         transport.put_bytes(remote_name, '')
@@ -442,7 +442,7 @@
         source_details = self.factory.makeCodeImportSourceDetails()
         # That the remote name is like this is part of the interface of
         # ImportDataStore.
-        remote_name = '%08x.tar.gz' % (source_details.branch_id,)
+        remote_name = '%08x.tar.gz' % (source_details.target_id,)
         content = self.factory.getUniqueString()
         transport = self.get_transport()
         transport.put_bytes(remote_name, content)
@@ -457,7 +457,7 @@
         source_details = self.factory.makeCodeImportSourceDetails()
         # That the remote name is like this is part of the interface of
         # ImportDataStore.
-        remote_name = '%08x.tar.gz' % (source_details.branch_id,)
+        remote_name = '%08x.tar.gz' % (source_details.target_id,)
         content = self.factory.getUniqueString()
         transport = self.get_transport()
         transport.put_bytes(remote_name, content)
@@ -481,7 +481,7 @@
         store.put(local_name)
         # That the remote name is like this is part of the interface of
         # ImportDataStore.
-        remote_name = '%08x.tar.gz' % (source_details.branch_id,)
+        remote_name = '%08x.tar.gz' % (source_details.target_id,)
         self.assertEquals(content, transport.get_bytes(remote_name))
 
     def test_put_ensures_base(self):
@@ -509,7 +509,7 @@
         store.put(local_name, self.get_transport(local_prefix))
         # That the remote name is like this is part of the interface of
         # ImportDataStore.
-        remote_name = '%08x.tar.gz' % (source_details.branch_id,)
+        remote_name = '%08x.tar.gz' % (source_details.target_id,)
         self.assertEquals(content, transport.get_bytes(remote_name))
 
 
@@ -585,8 +585,8 @@
         transport = store.import_data_store._transport
         source_details = store.import_data_store.source_details
         self.assertTrue(
-            transport.has('%08x.tar.gz' % source_details.branch_id),
-            "Couldn't find '%08x.tar.gz'" % source_details.branch_id)
+            transport.has('%08x.tar.gz' % source_details.target_id),
+            "Couldn't find '%08x.tar.gz'" % source_details.target_id)
 
     def test_fetchFromArchiveFailure(self):
         # If a tree has not been archived yet, but we try to retrieve it from
@@ -631,7 +631,7 @@
 
     def makeImportWorker(self):
         """Make an ImportWorker."""
-        return ImportWorker(
+        return ToBzrImportWorker(
             self.source_details, self.get_transport('import_data'),
             self.makeBazaarBranchStore(), logging.getLogger("silent"),
             AcceptAnythingPolicy())
@@ -728,7 +728,7 @@
         import_worker.import_data_store.put('git.db')
         # Make sure there's a Bazaar branch in the branch store.
         branch = self.make_branch('branch')
-        ImportWorker.pushBazaarBranch(import_worker, branch)
+        ToBzrImportWorker.pushBazaarBranch(import_worker, branch)
         # Finally, fetching the tree gets the git.db file too.
         branch = import_worker.getBazaarBranch()
         self.assertEqual(
@@ -747,7 +747,7 @@
         import_worker.import_data_store.put('git-cache.tar.gz')
         # Make sure there's a Bazaar branch in the branch store.
         branch = self.make_branch('branch')
-        ImportWorker.pushBazaarBranch(import_worker, branch)
+        ToBzrImportWorker.pushBazaarBranch(import_worker, branch)
         # Finally, fetching the tree gets the git.db file too.
         new_branch = import_worker.getBazaarBranch()
         self.assertEqual(
@@ -767,13 +767,13 @@
     :source_details: A `CodeImportSourceDetails` describing the import.
     """
     tree_transport = get_transport(config.codeimport.foreign_tree_store)
-    prefix = '%08x' % source_details.branch_id
+    prefix = '%08x' % source_details.target_id
     if tree_transport.has('.'):
         for filename in tree_transport.list_dir('.'):
             if filename.startswith(prefix):
                 tree_transport.delete(filename)
     branchstore = get_default_bazaar_branch_store()
-    branch_name = '%08x' % source_details.branch_id
+    branch_name = '%08x' % source_details.target_id
     if branchstore.transport.has(branch_name):
         branchstore.transport.delete_tree(branch_name)
 
@@ -822,7 +822,7 @@
         """Get the Bazaar branch 'worker' stored into its BazaarBranchStore.
         """
         branch_url = worker.bazaar_branch_store._getMirrorURL(
-            worker.source_details.branch_id)
+            worker.source_details.target_id)
         return Branch.open(branch_url)
 
     def test_import(self):
@@ -885,7 +885,7 @@
         self.addCleanup(lambda: shutil.rmtree(tree_path))
 
         branch_url = get_default_bazaar_branch_store()._getMirrorURL(
-            source_details.branch_id)
+            source_details.target_id)
         branch = Branch.open(branch_url)
 
         self.assertEqual(self.foreign_commit_count, branch.revno())
@@ -1345,7 +1345,7 @@
         self.assertEqual(
             CodeImportWorkerExitCode.SUCCESS, worker.run())
         branch_url = self.bazaar_store._getMirrorURL(
-            worker.source_details.branch_id)
+            worker.source_details.target_id)
         branch = Branch.open(branch_url)
         self.assertEquals(self.revid, branch.last_revision())
 

=== modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py'
--- lib/lp/codehosting/codeimport/tests/test_workermonitor.py	2016-10-11 13:46:57 +0000
+++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py	2016-10-11 13:46:57 +0000
@@ -241,21 +241,21 @@
         return worker_monitor.getWorkerArguments().addCallback(
             self.assertEqual, args)
 
-    def test_getWorkerArguments_sets_branch_url_and_logfilename(self):
-        # getWorkerArguments sets the _branch_url (for use in oops reports)
+    def test_getWorkerArguments_sets_target_url_and_logfilename(self):
+        # getWorkerArguments sets the _target_url (for use in oops reports)
         # and _log_file_name (for upload to the librarian) attributes on the
         # WorkerMonitor from the data returned by getImportDataForJobID.
-        branch_url = self.factory.getUniqueString()
+        target_url = self.factory.getUniqueString()
         log_file_name = self.factory.getUniqueString()
         worker_monitor = self.makeWorkerMonitorWithJob(
-            1, (['a'], branch_url, log_file_name))
+            1, (['a'], target_url, log_file_name))
 
         def check_branch_log(ignored):
             # Looking at the _ attributes here is in slightly poor taste, but
             # much much easier than them by logging and parsing an oops, etc.
             self.assertEqual(
-                (branch_url, log_file_name),
-                (worker_monitor._branch_url, worker_monitor._log_file_name))
+                (target_url, log_file_name),
+                (worker_monitor._target_url, worker_monitor._log_file_name))
 
         return worker_monitor.getWorkerArguments().addCallback(
             check_branch_log)

=== modified file 'lib/lp/codehosting/codeimport/worker.py'
--- lib/lp/codehosting/codeimport/worker.py	2016-10-11 13:46:57 +0000
+++ lib/lp/codehosting/codeimport/worker.py	2016-10-11 13:46:57 +0000
@@ -15,6 +15,7 @@
     'ForeignTreeStore',
     'GitImportWorker',
     'ImportWorker',
+    'ToBzrImportWorker',
     'get_default_bazaar_branch_store',
     ]
 
@@ -268,18 +269,20 @@
     of the information suitable for passing around on executables' command
     lines.
 
-    :ivar branch_id: The id of the branch associated to this code import, used
-        for locating the existing import and the foreign tree.
+    :ivar target_id: The id of the Bazaar branch associated with this code
+        import, used for locating the existing import and the foreign tree.
     :ivar rcstype: 'cvs', 'git', 'bzr-svn', 'bzr' as appropriate.
     :ivar url: The branch URL if rcstype in ['bzr-svn', 'git', 'bzr'], None
         otherwise.
     :ivar cvs_root: The $CVSROOT if rcstype == 'cvs', None otherwise.
     :ivar cvs_module: The CVS module if rcstype == 'cvs', None otherwise.
+    :ivar stacked_on_url: The URL of the branch that the associated branch
+        is stacked on, if any.
     """
 
-    def __init__(self, branch_id, rcstype, url=None, cvs_root=None,
+    def __init__(self, target_id, rcstype, url=None, cvs_root=None,
                  cvs_module=None, stacked_on_url=None):
-        self.branch_id = branch_id
+        self.target_id = target_id
         self.rcstype = rcstype
         self.url = url
         self.cvs_root = cvs_root
@@ -289,7 +292,7 @@
     @classmethod
     def fromArguments(cls, arguments):
         """Convert command line-style arguments to an instance."""
-        branch_id = int(arguments.pop(0))
+        target_id = int(arguments.pop(0))
         rcstype = arguments.pop(0)
         if rcstype in ['bzr-svn', 'git', 'bzr']:
             url = arguments.pop(0)
@@ -305,34 +308,34 @@
         else:
             raise AssertionError("Unknown rcstype %r." % rcstype)
         return cls(
-            branch_id, rcstype, url, cvs_root, cvs_module, stacked_on_url)
+            target_id, rcstype, url, cvs_root, cvs_module, stacked_on_url)
 
     @classmethod
     def fromCodeImportJob(cls, job):
         """Convert a `CodeImportJob` to an instance."""
         code_import = job.code_import
-        branch = code_import.branch
-        if branch.stacked_on is not None and not branch.stacked_on.private:
-            stacked_path = branch_id_alias(branch.stacked_on)
+        target = code_import.target
+        if target.stacked_on is not None and not target.stacked_on.private:
+            stacked_path = branch_id_alias(target.stacked_on)
             stacked_on_url = compose_public_url('http', stacked_path)
         else:
             stacked_on_url = None
         if code_import.rcs_type == RevisionControlSystems.BZR_SVN:
             return cls(
-                branch.id, 'bzr-svn', str(code_import.url),
+                target.id, 'bzr-svn', str(code_import.url),
                 stacked_on_url=stacked_on_url)
         elif code_import.rcs_type == RevisionControlSystems.CVS:
             return cls(
-                branch.id, 'cvs',
+                target.id, 'cvs',
                 cvs_root=str(code_import.cvs_root),
                 cvs_module=str(code_import.cvs_module))
         elif code_import.rcs_type == RevisionControlSystems.GIT:
             return cls(
-                branch.id, 'git', str(code_import.url),
+                target.id, 'git', str(code_import.url),
                 stacked_on_url=stacked_on_url)
         elif code_import.rcs_type == RevisionControlSystems.BZR:
             return cls(
-                branch.id, 'bzr', str(code_import.url),
+                target.id, 'bzr', str(code_import.url),
                 stacked_on_url=stacked_on_url)
         else:
             raise AssertionError("Unknown rcstype %r." % code_import.rcs_type)
@@ -340,7 +343,7 @@
     def asArguments(self):
         """Return a list of arguments suitable for passing to a child process.
         """
-        result = [str(self.branch_id), self.rcstype]
+        result = [str(self.target_id), self.rcstype]
         if self.rcstype in ['bzr-svn', 'git', 'bzr']:
             result.append(self.url)
             if self.stacked_on_url is not None:
@@ -374,7 +377,7 @@
         """
         self.source_details = source_details
         self._transport = transport
-        self._branch_id = source_details.branch_id
+        self._target_id = source_details.target_id
 
     def _getRemoteName(self, local_name):
         """Convert `local_name` to the name used to store a file.
@@ -394,7 +397,7 @@
         if dot_index < 0:
             raise AssertionError("local_name must have an extension.")
         ext = local_name[dot_index:]
-        return '%08x%s' % (self._branch_id, ext)
+        return '%08x%s' % (self._target_id, ext)
 
     def fetch(self, filename, dest_transport=None):
         """Retrieve `filename` from the store.
@@ -505,57 +508,23 @@
 class ImportWorker:
     """Oversees the actual work of a code import."""
 
-    # Where the Bazaar working tree will be stored.
-    BZR_BRANCH_PATH = 'bzr_branch'
-
-    # Should `getBazaarBranch` create a working tree?
-    needs_bzr_tree = True
-
-    required_format = BzrDirFormat.get_default_format()
-
-    def __init__(self, source_details, import_data_transport,
-                 bazaar_branch_store, logger, opener_policy):
+    def __init__(self, source_details, logger, opener_policy):
         """Construct an `ImportWorker`.
 
         :param source_details: A `CodeImportSourceDetails` object.
-        :param bazaar_branch_store: A `BazaarBranchStore`. The import worker
-            uses this to fetch and store the Bazaar branches that are created
-            and updated during the import process.
         :param logger: A `Logger` to pass to cscvs.
         :param opener_policy: Policy object that decides what branches can
              be imported
         """
         self.source_details = source_details
-        self.bazaar_branch_store = bazaar_branch_store
-        self.import_data_store = ImportDataStore(
-            import_data_transport, self.source_details)
         self._logger = logger
         self._opener_policy = opener_policy
 
-    def getBazaarBranch(self):
-        """Return the Bazaar `Branch` that we are importing into."""
-        if os.path.isdir(self.BZR_BRANCH_PATH):
-            shutil.rmtree(self.BZR_BRANCH_PATH)
-        return self.bazaar_branch_store.pull(
-            self.source_details.branch_id, self.BZR_BRANCH_PATH,
-            self.required_format, self.needs_bzr_tree,
-            stacked_on_url=self.source_details.stacked_on_url)
-
-    def pushBazaarBranch(self, bazaar_branch):
-        """Push the updated Bazaar branch to the server.
-
-        :return: True if revisions were transferred.
-        """
-        return self.bazaar_branch_store.push(
-            self.source_details.branch_id, bazaar_branch,
-            self.required_format,
-            stacked_on_url=self.source_details.stacked_on_url)
-
     def getWorkingDirectory(self):
         """The directory we should change to and store all scratch files in.
         """
         base = config.codeimportworker.working_directory_root
-        dirname = 'worker-for-branch-%s' % self.source_details.branch_id
+        dirname = 'worker-for-branch-%s' % self.source_details.target_id
         return os.path.join(base, dirname)
 
     def run(self):
@@ -594,7 +563,56 @@
         raise NotImplementedError()
 
 
-class CSCVSImportWorker(ImportWorker):
+class ToBzrImportWorker(ImportWorker):
+    """Oversees the actual work of a code import to Bazaar."""
+
+    # Where the Bazaar working tree will be stored.
+    BZR_BRANCH_PATH = 'bzr_branch'
+
+    # Should `getBazaarBranch` create a working tree?
+    needs_bzr_tree = True
+
+    required_format = BzrDirFormat.get_default_format()
+
+    def __init__(self, source_details, import_data_transport,
+                 bazaar_branch_store, logger, opener_policy):
+        """Construct a `ToBzrImportWorker`.
+
+        :param source_details: A `CodeImportSourceDetails` object.
+        :param bazaar_branch_store: A `BazaarBranchStore`. The import worker
+            uses this to fetch and store the Bazaar branches that are created
+            and updated during the import process.
+        :param logger: A `Logger` to pass to cscvs.
+        :param opener_policy: Policy object that decides what branches can
+             be imported
+        """
+        super(ToBzrImportWorker, self).__init__(
+            source_details, logger, opener_policy)
+        self.bazaar_branch_store = bazaar_branch_store
+        self.import_data_store = ImportDataStore(
+            import_data_transport, self.source_details)
+
+    def getBazaarBranch(self):
+        """Return the Bazaar `Branch` that we are importing into."""
+        if os.path.isdir(self.BZR_BRANCH_PATH):
+            shutil.rmtree(self.BZR_BRANCH_PATH)
+        return self.bazaar_branch_store.pull(
+            self.source_details.target_id, self.BZR_BRANCH_PATH,
+            self.required_format, self.needs_bzr_tree,
+            stacked_on_url=self.source_details.stacked_on_url)
+
+    def pushBazaarBranch(self, bazaar_branch):
+        """Push the updated Bazaar branch to the server.
+
+        :return: True if revisions were transferred.
+        """
+        return self.bazaar_branch_store.push(
+            self.source_details.target_id, bazaar_branch,
+            self.required_format,
+            stacked_on_url=self.source_details.stacked_on_url)
+
+
+class CSCVSImportWorker(ToBzrImportWorker):
     """An ImportWorker for imports that use CSCVS.
 
     As well as invoking cscvs to do the import, this class also needs to
@@ -674,7 +692,7 @@
             return CodeImportWorkerExitCode.SUCCESS_NOCHANGE
 
 
-class PullingImportWorker(ImportWorker):
+class PullingImportWorker(ToBzrImportWorker):
     """An import worker for imports that can be done by a bzr plugin.
 
     Subclasses need to implement `probers`.
@@ -806,7 +824,7 @@
         return config.codeimport.git_revisions_import_limit
 
     def getBazaarBranch(self):
-        """See `ImportWorker.getBazaarBranch`.
+        """See `ToBzrImportWorker.getBazaarBranch`.
 
         In addition to the superclass' behaviour, we retrieve bzr-git's
         caches, both legacy and modern, from the import data store and put
@@ -828,7 +846,7 @@
         return branch
 
     def pushBazaarBranch(self, bazaar_branch):
-        """See `ImportWorker.pushBazaarBranch`.
+        """See `ToBzrImportWorker.pushBazaarBranch`.
 
         In addition to the superclass' behaviour, we store bzr-git's cache
         directory at .bzr/repository/git in the import data store.

=== modified file 'lib/lp/codehosting/codeimport/workermonitor.py'
--- lib/lp/codehosting/codeimport/workermonitor.py	2013-01-07 02:40:55 +0000
+++ lib/lp/codehosting/codeimport/workermonitor.py	2016-10-11 13:46:57 +0000
@@ -134,7 +134,7 @@
         self.codeimport_endpoint = codeimport_endpoint
         self._call_finish_job = True
         self._log_file = tempfile.TemporaryFile()
-        self._branch_url = None
+        self._target_url = None
         self._log_file_name = 'no-name-set.txt'
         self._access_policy = access_policy
 
@@ -143,7 +143,7 @@
         context = {
             'twisted_failure': failure,
             'http_request': errorlog.ScriptRequest(
-                [('code_import_job_id', self._job_id)], self._branch_url),
+                [('code_import_job_id', self._job_id)], self._target_url),
             }
         report = config.create(context)
 
@@ -169,15 +169,15 @@
     def getWorkerArguments(self):
         """Get arguments for the worker for the import we are working on.
 
-        This also sets the _branch_url and _log_file_name attributes for use
+        This also sets the _target_url and _log_file_name attributes for use
         in the _logOopsFromFailure and finishJob methods respectively.
         """
         deferred = self.codeimport_endpoint.callRemote(
             'getImportDataForJobID', self._job_id)
 
         def _processResult(result):
-            code_import_arguments, branch_url, log_file_name = result
-            self._branch_url = branch_url
+            code_import_arguments, target_url, log_file_name = result
+            self._target_url = target_url
             self._log_file_name = log_file_name
             self._logger.info(
                 'Found source details: %s', code_import_arguments)

=== modified file 'lib/lp/testing/factory.py'
--- lib/lp/testing/factory.py	2016-10-03 17:00:56 +0000
+++ lib/lp/testing/factory.py	2016-10-11 13:46:57 +0000
@@ -492,11 +492,11 @@
         epoch = datetime(2009, 1, 1, tzinfo=pytz.UTC)
         return epoch + timedelta(minutes=self.getUniqueInteger())
 
-    def makeCodeImportSourceDetails(self, branch_id=None, rcstype=None,
+    def makeCodeImportSourceDetails(self, target_id=None, rcstype=None,
                                     url=None, cvs_root=None, cvs_module=None,
                                     stacked_on_url=None):
-        if branch_id is None:
-            branch_id = self.getUniqueInteger()
+        if target_id is None:
+            target_id = self.getUniqueInteger()
         if rcstype is None:
             rcstype = 'bzr-svn'
         if rcstype in ['bzr-svn', 'bzr']:
@@ -516,7 +516,7 @@
         else:
             raise AssertionError("Unknown rcstype %r." % rcstype)
         return CodeImportSourceDetails(
-            branch_id, rcstype, url, cvs_root, cvs_module,
+            target_id, rcstype, url, cvs_root, cvs_module,
             stacked_on_url=stacked_on_url)
 
 


Follow ups