← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/reduce-webhook-retries into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/reduce-webhook-retries into lp:launchpad.

Commit message:
Don't retry webhook deliveries that respond with 4xx.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/reduce-webhook-retries/+merge/365873

These are generally permanent errors due to some kind of misconfiguration, and we don't want them clogging up our job queue.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/reduce-webhook-retries into lp:launchpad.
=== modified file 'lib/lp/services/webhooks/model.py'
--- lib/lp/services/webhooks/model.py	2017-03-28 14:28:34 +0000
+++ lib/lp/services/webhooks/model.py	2019-04-11 18:41:24 +0000
@@ -1,4 +1,4 @@
-# Copyright 2015-2017 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __metaclass__ = type
@@ -467,6 +467,17 @@
         self.queue()
 
     @property
+    def should_retry(self):
+        if 'result' not in self.json_data:
+            return False
+        if self.json_data['result'].get('webhook_deactivated'):
+            return False
+        if self.json_data['result'].get('connection_error') is not None:
+            return True
+        status_code = self.json_data['result']['response']['status_code']
+        return 500 <= status_code <= 599
+
+    @property
     def retry_automatically(self):
         return self._time_since_first_attempt < timedelta(days=1)
 
@@ -510,7 +521,7 @@
         transaction.commit()
 
         if not self.successful:
-            if self.retry_automatically:
+            if self.should_retry and self.retry_automatically:
                 raise WebhookDeliveryRetry()
             else:
                 raise WebhookDeliveryFailure(self.error_message)

=== modified file 'lib/lp/services/webhooks/tests/test_job.py'
--- lib/lp/services/webhooks/tests/test_job.py	2018-05-31 10:23:03 +0000
+++ lib/lp/services/webhooks/tests/test_job.py	2019-04-11 18:41:24 +0000
@@ -1,4 +1,4 @@
-# Copyright 2015-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for `WebhookJob`s."""
@@ -366,10 +366,11 @@
             job,
             MatchesStructure(
                 status=Equals(JobStatus.COMPLETED),
-                pending=Equals(False),
-                successful=Equals(True),
+                pending=Is(False),
+                successful=Is(True),
                 date_sent=Not(Is(None)),
                 error_message=Is(None),
+                should_retry=Is(False),
                 json_data=ContainsDict(
                     {'result': MatchesAll(
                         KeysEqual('request', 'response'),
@@ -406,18 +407,19 @@
         self.assertEqual([], oopses.oopses)
 
     def test_run_404(self):
-        # A request that returns a non-2xx response is a failure and
-        # gets retried.
+        # A request that returns a non-2xx/5xx response is a failure and
+        # does not get retried.
         with CaptureOops() as oopses:
             job, reqs = self.makeAndRunJob(response_status=404)
         self.assertThat(
             job,
             MatchesStructure(
-                status=Equals(JobStatus.WAITING),
-                pending=Equals(True),
-                successful=Equals(False),
+                status=Equals(JobStatus.FAILED),
+                pending=Is(False),
+                successful=Is(False),
                 date_sent=Not(Is(None)),
                 error_message=Equals('Bad HTTP response: 404'),
+                should_retry=Is(False),
                 json_data=ContainsDict(
                     {'result': MatchesAll(
                         KeysEqual('request', 'response'),
@@ -427,6 +429,29 @@
         self.assertEqual(1, len(reqs))
         self.assertEqual([], oopses.oopses)
 
+    def test_run_503(self):
+        # A request that returns a 5xx response is a failure and gets
+        # retried.
+        with CaptureOops() as oopses:
+            job, reqs = self.makeAndRunJob(response_status=503)
+        self.assertThat(
+            job,
+            MatchesStructure(
+                status=Equals(JobStatus.WAITING),
+                pending=Is(True),
+                successful=Is(False),
+                date_sent=Not(Is(None)),
+                error_message=Equals('Bad HTTP response: 503'),
+                should_retry=Is(True),
+                json_data=ContainsDict(
+                    {'result': MatchesAll(
+                        KeysEqual('request', 'response'),
+                        ContainsDict(
+                            {'response': ContainsDict(
+                                {'status_code': Equals(503)})}))})))
+        self.assertEqual(1, len(reqs))
+        self.assertEqual([], oopses.oopses)
+
     def test_run_connection_error(self):
         # Jobs that fail to connect have a connection_error rather than a
         # response. They too trigger a retry.
@@ -437,10 +462,11 @@
             job,
             MatchesStructure(
                 status=Equals(JobStatus.WAITING),
-                pending=Equals(True),
-                successful=Equals(False),
+                pending=Is(True),
+                successful=Is(False),
                 date_sent=Not(Is(None)),
                 error_message=Equals('Connection error: Connection refused'),
+                should_retry=Is(True),
                 json_data=ContainsDict(
                     {'result': MatchesAll(
                         KeysEqual('request', 'connection_error'),
@@ -462,10 +488,11 @@
             job,
             MatchesStructure(
                 status=Equals(JobStatus.FAILED),
-                pending=Equals(False),
+                pending=Is(False),
                 successful=Is(None),
                 date_sent=Is(None),
                 error_message=Is(None),
+                should_retry=Is(False),
                 json_data=Not(Contains('result'))))
         self.assertEqual([], reqs)
         self.assertEqual(1, len(oopses.oopses))
@@ -483,18 +510,19 @@
             job,
             MatchesStructure(
                 status=Equals(JobStatus.FAILED),
-                pending=Equals(False),
-                successful=Equals(False),
+                pending=Is(False),
+                successful=Is(False),
                 date_sent=Is(None),
                 error_message=Equals('Webhook deactivated'),
+                should_retry=Is(False),
                 json_data=ContainsDict(
                     {'result': MatchesDict(
-                        {'webhook_deactivated': Equals(True)})})))
+                        {'webhook_deactivated': Is(True)})})))
         self.assertEqual([], reqs)
         self.assertEqual([], oopses.oopses)
 
     def test_date_first_sent(self):
-        job, reqs = self.makeAndRunJob(response_status=404)
+        job, reqs = self.makeAndRunJob(response_status=503)
         self.assertEqual(job.date_first_sent, job.date_sent)
         orig_first_sent = job.date_first_sent
         self.assertEqual(JobStatus.WAITING, job.status)
@@ -551,7 +579,7 @@
     def test_automatic_retries(self):
         hook = self.factory.makeWebhook()
         job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})
-        client = MockWebhookClient(response_status=404)
+        client = MockWebhookClient(response_status=503)
         self.useFixture(ZopeUtilityFixture(client, IWebhookClient))
 
         # The first attempt fails but schedules a retry five minutes later.
@@ -585,7 +613,7 @@
     def test_manual_retries(self):
         hook = self.factory.makeWebhook()
         job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})
-        client = MockWebhookClient(response_status=404)
+        client = MockWebhookClient(response_status=503)
         self.useFixture(ZopeUtilityFixture(client, IWebhookClient))
 
         # Simulate a first attempt failure.
@@ -633,7 +661,7 @@
         # systemic errors that erroneously failed many deliveries.
         hook = self.factory.makeWebhook()
         job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})
-        client = MockWebhookClient(response_status=404)
+        client = MockWebhookClient(response_status=503)
         self.useFixture(ZopeUtilityFixture(client, IWebhookClient))
 
         # Simulate a first attempt failure.


Follow ups