← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:code-import-worker-blacklisted-hostnames into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:code-import-worker-blacklisted-hostnames into launchpad:master.

Commit message:
Add --exclude-host option to code import worker

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393216

This allows the scheduler to pass information to the worker about which hosts should be excluded from imports, rather than requiring the worker to have access to appropriate configuration itself.  Making this separation will make it easier to maintain a split-out lp-codeimport in the future.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:code-import-worker-blacklisted-hostnames into launchpad:master.
diff --git a/lib/lp/codehosting/codeimport/tests/test_worker.py b/lib/lp/codehosting/codeimport/tests/test_worker.py
index 892ec3c..b6c2614 100644
--- a/lib/lp/codehosting/codeimport/tests/test_worker.py
+++ b/lib/lp/codehosting/codeimport/tests/test_worker.py
@@ -852,6 +852,15 @@ class TestActualImportMixin:
         Override this in your subclass if you need it.
         """
 
+    def test_exclude_hosts(self):
+        details = CodeImportSourceDetails.fromArguments(
+            self.makeWorkerArguments(
+                'trunk', [('README', b'Original contents')]) +
+            ['--exclude-host', 'bad.example.com',
+             '--exclude-host', 'worse.example.com'])
+        self.assertEqual(
+            ['bad.example.com', 'worse.example.com'], details.exclude_hosts)
+
     def test_import(self):
         # Running the worker on a branch that hasn't been imported yet imports
         # the branch.
@@ -1465,6 +1474,14 @@ class CodeImportBranchOpenPolicyTests(TestCase):
         self.assertGoodUrl("git://git.example.com/repo")
         self.assertBadUrl("git://git.launchpad.test/example")
 
+    def test_check_one_url_exclude_hosts(self):
+        self.policy = CodeImportBranchOpenPolicy(
+            "bzr", "bzr",
+            exclude_hosts=["bad.example.com", "worse.example.com"])
+        self.assertGoodUrl("git://good.example.com/repo")
+        self.assertBadUrl("git://bad.example.com/repo")
+        self.assertBadUrl("git://worse.example.com/repo")
+
 
 class RedirectTests(http_utils.TestCaseWithRedirectedWebserver, TestCase):
 
diff --git a/lib/lp/codehosting/codeimport/worker.py b/lib/lp/codehosting/codeimport/worker.py
index 6bf5d8d..2652b16 100644
--- a/lib/lp/codehosting/codeimport/worker.py
+++ b/lib/lp/codehosting/codeimport/worker.py
@@ -111,9 +111,10 @@ class CodeImportBranchOpenPolicy(BranchOpenPolicy):
 
     allowed_schemes = ['http', 'https', 'svn', 'git', 'ftp', 'bzr']
 
-    def __init__(self, rcstype, target_rcstype):
+    def __init__(self, rcstype, target_rcstype, exclude_hosts=None):
         self.rcstype = rcstype
         self.target_rcstype = target_rcstype
+        self.exclude_hosts = list(exclude_hosts or [])
 
     def should_follow_references(self):
         """See `BranchOpenPolicy.should_follow_references`.
@@ -142,6 +143,12 @@ class CodeImportBranchOpenPolicy(BranchOpenPolicy):
             uri = URI(url)
         except InvalidURIError:
             raise BadUrl(url)
+        for hostname in self.exclude_hosts:
+            if uri.underDomain(hostname):
+                raise BadUrl(url)
+        # XXX cjwatson 2020-10-20: These hostname checks require tight
+        # coupling with Launchpad.  Remove them once the scheduler always
+        # passes exclusions via --exclude-host arguments.
         if self.rcstype == self.target_rcstype:
             launchpad_domain = config.vhost.mainsite.hostname
             if uri.underDomain(launchpad_domain):
@@ -307,11 +314,13 @@ class CodeImportSourceDetails:
         is stacked on, if any.
     :ivar macaroon: A macaroon granting authority to push to the target
         repository if target_rcstype == 'git', None otherwise.
+    :ivar exclude_hosts: A list of host names from which we should not
+        mirror branches, or None.
     """
 
     def __init__(self, target_id, rcstype, target_rcstype, url=None,
                  cvs_root=None, cvs_module=None, stacked_on_url=None,
-                 macaroon=None):
+                 macaroon=None, exclude_hosts=None):
         self.target_id = target_id
         self.rcstype = rcstype
         self.target_rcstype = target_rcstype
@@ -320,6 +329,7 @@ class CodeImportSourceDetails:
         self.cvs_module = cvs_module
         self.stacked_on_url = stacked_on_url
         self.macaroon = macaroon
+        self.exclude_hosts = exclude_hosts
 
     @classmethod
     def fromArguments(cls, arguments):
@@ -346,6 +356,9 @@ class CodeImportSourceDetails:
             help=(
                 'Macaroon authorising push (only valid for target_rcstype '
                 'git)'))
+        parser.add_argument(
+            '--exclude-host', action='append', dest='exclude_hosts',
+            help='Refuse to mirror branches from this host')
         args = parser.parse_args(arguments)
         target_id = args.target_id
         rcstype = args.rcstype
@@ -374,7 +387,7 @@ class CodeImportSourceDetails:
             macaroon = args.macaroon
         return cls(
             target_id, rcstype, target_rcstype, url, cvs_root, cvs_module,
-            stacked_on_url, macaroon)
+            stacked_on_url, macaroon, exclude_hosts=args.exclude_hosts)
 
 
 class ImportDataStore:
diff --git a/scripts/code-import-worker.py b/scripts/code-import-worker.py
index cc96230..434b449 100755
--- a/scripts/code-import-worker.py
+++ b/scripts/code-import-worker.py
@@ -39,7 +39,9 @@ from lp.services.timeout import set_default_timeout_function
 
 
 opener_policies = {
-    "anything": lambda rcstype, target_rcstype: AcceptAnythingPolicy(),
+    "anything": (
+        lambda rcstype, target_rcstype, exclude_hosts=None:
+        AcceptAnythingPolicy()),
     "default": CodeImportBranchOpenPolicy,
     }
 
@@ -89,7 +91,8 @@ class CodeImportWorker:
             raise AssertionError(
                 'unknown rcstype %r' % source_details.rcstype)
         opener_policy = opener_policies[self.options.access_policy](
-            source_details.rcstype, source_details.target_rcstype)
+            source_details.rcstype, source_details.target_rcstype,
+            exclude_hosts=source_details.exclude_hosts)
         if source_details.target_rcstype == 'bzr':
             import_worker = import_worker_cls(
                 source_details,