launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #25604
[Merge] ~cjwatson/launchpad:code-import-worker-blacklisted-hostnames into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:code-import-worker-blacklisted-hostnames into launchpad:master.
Commit message:
Add --exclude-host option to code import worker
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393216
This allows the scheduler to pass information to the worker about which hosts should be excluded from imports, rather than requiring the worker to have access to appropriate configuration itself. Making this separation will make it easier to maintain a split-out lp-codeimport in the future.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:code-import-worker-blacklisted-hostnames into launchpad:master.
diff --git a/lib/lp/codehosting/codeimport/tests/test_worker.py b/lib/lp/codehosting/codeimport/tests/test_worker.py
index 892ec3c..b6c2614 100644
--- a/lib/lp/codehosting/codeimport/tests/test_worker.py
+++ b/lib/lp/codehosting/codeimport/tests/test_worker.py
@@ -852,6 +852,15 @@ class TestActualImportMixin:
Override this in your subclass if you need it.
"""
+ def test_exclude_hosts(self):
+ details = CodeImportSourceDetails.fromArguments(
+ self.makeWorkerArguments(
+ 'trunk', [('README', b'Original contents')]) +
+ ['--exclude-host', 'bad.example.com',
+ '--exclude-host', 'worse.example.com'])
+ self.assertEqual(
+ ['bad.example.com', 'worse.example.com'], details.exclude_hosts)
+
def test_import(self):
# Running the worker on a branch that hasn't been imported yet imports
# the branch.
@@ -1465,6 +1474,14 @@ class CodeImportBranchOpenPolicyTests(TestCase):
self.assertGoodUrl("git://git.example.com/repo")
self.assertBadUrl("git://git.launchpad.test/example")
+ def test_check_one_url_exclude_hosts(self):
+ self.policy = CodeImportBranchOpenPolicy(
+ "bzr", "bzr",
+ exclude_hosts=["bad.example.com", "worse.example.com"])
+ self.assertGoodUrl("git://good.example.com/repo")
+ self.assertBadUrl("git://bad.example.com/repo")
+ self.assertBadUrl("git://worse.example.com/repo")
+
class RedirectTests(http_utils.TestCaseWithRedirectedWebserver, TestCase):
diff --git a/lib/lp/codehosting/codeimport/worker.py b/lib/lp/codehosting/codeimport/worker.py
index 6bf5d8d..2652b16 100644
--- a/lib/lp/codehosting/codeimport/worker.py
+++ b/lib/lp/codehosting/codeimport/worker.py
@@ -111,9 +111,10 @@ class CodeImportBranchOpenPolicy(BranchOpenPolicy):
allowed_schemes = ['http', 'https', 'svn', 'git', 'ftp', 'bzr']
- def __init__(self, rcstype, target_rcstype):
+ def __init__(self, rcstype, target_rcstype, exclude_hosts=None):
self.rcstype = rcstype
self.target_rcstype = target_rcstype
+ self.exclude_hosts = list(exclude_hosts or [])
def should_follow_references(self):
"""See `BranchOpenPolicy.should_follow_references`.
@@ -142,6 +143,12 @@ class CodeImportBranchOpenPolicy(BranchOpenPolicy):
uri = URI(url)
except InvalidURIError:
raise BadUrl(url)
+ for hostname in self.exclude_hosts:
+ if uri.underDomain(hostname):
+ raise BadUrl(url)
+ # XXX cjwatson 2020-10-20: These hostname checks require tight
+ # coupling with Launchpad. Remove them once the scheduler always
+ # passes exclusions via --exclude-host arguments.
if self.rcstype == self.target_rcstype:
launchpad_domain = config.vhost.mainsite.hostname
if uri.underDomain(launchpad_domain):
@@ -307,11 +314,13 @@ class CodeImportSourceDetails:
is stacked on, if any.
:ivar macaroon: A macaroon granting authority to push to the target
repository if target_rcstype == 'git', None otherwise.
+ :ivar exclude_hosts: A list of host names from which we should not
+ mirror branches, or None.
"""
def __init__(self, target_id, rcstype, target_rcstype, url=None,
cvs_root=None, cvs_module=None, stacked_on_url=None,
- macaroon=None):
+ macaroon=None, exclude_hosts=None):
self.target_id = target_id
self.rcstype = rcstype
self.target_rcstype = target_rcstype
@@ -320,6 +329,7 @@ class CodeImportSourceDetails:
self.cvs_module = cvs_module
self.stacked_on_url = stacked_on_url
self.macaroon = macaroon
+ self.exclude_hosts = exclude_hosts
@classmethod
def fromArguments(cls, arguments):
@@ -346,6 +356,9 @@ class CodeImportSourceDetails:
help=(
'Macaroon authorising push (only valid for target_rcstype '
'git)'))
+ parser.add_argument(
+ '--exclude-host', action='append', dest='exclude_hosts',
+ help='Refuse to mirror branches from this host')
args = parser.parse_args(arguments)
target_id = args.target_id
rcstype = args.rcstype
@@ -374,7 +387,7 @@ class CodeImportSourceDetails:
macaroon = args.macaroon
return cls(
target_id, rcstype, target_rcstype, url, cvs_root, cvs_module,
- stacked_on_url, macaroon)
+ stacked_on_url, macaroon, exclude_hosts=args.exclude_hosts)
class ImportDataStore:
diff --git a/scripts/code-import-worker.py b/scripts/code-import-worker.py
index cc96230..434b449 100755
--- a/scripts/code-import-worker.py
+++ b/scripts/code-import-worker.py
@@ -39,7 +39,9 @@ from lp.services.timeout import set_default_timeout_function
opener_policies = {
- "anything": lambda rcstype, target_rcstype: AcceptAnythingPolicy(),
+ "anything": (
+ lambda rcstype, target_rcstype, exclude_hosts=None:
+ AcceptAnythingPolicy()),
"default": CodeImportBranchOpenPolicy,
}
@@ -89,7 +91,8 @@ class CodeImportWorker:
raise AssertionError(
'unknown rcstype %r' % source_details.rcstype)
opener_policy = opener_policies[self.options.access_policy](
- source_details.rcstype, source_details.target_rcstype)
+ source_details.rcstype, source_details.target_rcstype,
+ exclude_hosts=source_details.exclude_hosts)
if source_details.target_rcstype == 'bzr':
import_worker = import_worker_cls(
source_details,