← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jameinel/launchpad/loggerhead-disconnect-701329 into lp:launchpad

 

You have been requested to review the proposed merge of lp:~jameinel/launchpad/loggerhead-disconnect-701329 into lp:launchpad.

For more details, see:
https://code.launchpad.net/~jameinel/launchpad/loggerhead-disconnect-701329/+merge/48665

This should fix the 16k OOPS reports that we were seeing on loggerhead. There are probably other ways to fix it, as well.

The failure was being triggered because haproxy is issuing a HEAD request to loggerhead, and then closing before it actually reads all of the result. loggerhead ends up getting socket.error(EPIPE) while trying to write information out.

I wasn't able to trigger this by going to the Apache proxy interface, presumably because Apache reads the full header from loggerhead before it then reports things back to the user.

In the WSGIHandler code, it is suppressing all of these socket errors (SSL error, socket.error, etc). I went ahead and followed suit, to suppress them instead of reporting an oops.

If you consider the code as it exists, it would want to give OOPS info to the user. However, if the socket is closed, you can't send anything anyway.

If we wanted, we could have a more fine-grained approach here (only suppress socket.error(EPIPE), or something like that.) We could also just trap at a different level. For example in the 'write_wrapper' function.

-- 
https://code.launchpad.net/~jameinel/launchpad/loggerhead-disconnect-701329/+merge/48665
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jameinel/launchpad/loggerhead-disconnect-701329 into lp:launchpad.
=== modified file 'lib/launchpad_loggerhead/app.py'
--- lib/launchpad_loggerhead/app.py	2011-01-31 23:45:55 +0000
+++ lib/launchpad_loggerhead/app.py	2011-02-09 00:47:36 +0000
@@ -18,6 +18,7 @@
 from openid.extensions.sreg import SRegRequest, SRegResponse
 from openid.consumer.consumer import CANCEL, Consumer, FAILURE, SUCCESS
 
+from paste import httpserver
 from paste.fileapp import DataApp
 from paste.request import construct_url, parse_querystring, path_info_pop
 from paste.httpexceptions import (
@@ -336,29 +337,32 @@
     error_utility = make_error_utility()
     def wrapped_app(environ, start_response):
         wrapped = WrappedStartResponse(start_response)
-        # Start processing this request
         try:
+            # Start processing this request, build the app
             app_iter = iter(app(environ, wrapped.start_response))
+            # Start yielding the response
+            while True:
+                try:
+                    data = app_iter.next()
+                except StopIteration:
+                    return
+                if not wrapped.body_started:
+                    wrapped.really_start()
+                yield data
+        except httpserver.SocketErrors, e:
+            # The Paste WSGIHandler suppresses these exceptions.
+            # Generally it means something like 'EPIPE' because the
+            # connection was closed. We don't want to generate an OOPS
+            # just because the connection was closed prematurely.
+            logger = logging.getLogger('lp-loggerhead')
+            logger.info('Caught socket exception from %s: %s %s'
+                         % (environ.get('REMOTE_ADDR', '<unknown>'),
+                            e.__class__, e,))
+            return
         except:
             error_page_sent = wrapped.generate_oops(environ, error_utility)
             if error_page_sent:
                 return
             # Could not send error page to user, so... just give up.
             raise
-        # Start yielding the response
-        while True:
-            try:
-                data = app_iter.next()
-            except StopIteration:
-                return
-            except:
-                error_page_sent = wrapped.generate_oops(environ, error_utility)
-                if error_page_sent:
-                    return
-                # Could not send error page to user, so... just give up.
-                raise
-            else:
-                if not wrapped.body_started:
-                    wrapped.really_start()
-                yield data
     return wrapped_app

=== modified file 'lib/launchpad_loggerhead/tests.py'
--- lib/launchpad_loggerhead/tests.py	2010-10-04 19:50:45 +0000
+++ lib/launchpad_loggerhead/tests.py	2011-02-09 00:47:36 +0000
@@ -1,19 +1,27 @@
 # Copyright 2010 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
+import cStringIO
+import errno
+import logging
 import unittest
 import urllib
+import socket
+import re
 
 import lazr.uri
 import wsgi_intercept
 from wsgi_intercept.urllib2_intercept import install_opener, uninstall_opener
 import wsgi_intercept.zope_testbrowser
+from paste import httpserver
 from paste.httpexceptions import HTTPExceptionHandler
+import zope.event
 
 from canonical.config import config
+from canonical.launchpad.webapp.errorlog import ErrorReport, ErrorReportEvent
 from canonical.launchpad.webapp.vhosts import allvhosts
 from canonical.testing.layers import DatabaseFunctionalLayer
-from launchpad_loggerhead.app import RootApp
+from launchpad_loggerhead.app import RootApp, oops_middleware
 from launchpad_loggerhead.session import SessionHandler
 from lp.testing import TestCase
 
@@ -142,5 +150,110 @@
                          'This is a dummy destination.\n')
 
 
+class TestOopsMiddleware(TestCase):
+
+    def assertContainsRe(self, haystack, needle_re, flags=0):
+        """Assert that a contains something matching a regular expression."""
+        # There is: self.assertTextMatchesExpressionIgnoreWhitespace
+        #           but it does weird things with whitespace, and gives
+        #           unhelpful error messages when it fails, so this is copied
+        #           from bzrlib
+        if not re.search(needle_re, haystack, flags):
+            if '\n' in haystack or len(haystack) > 60:
+                # a long string, format it in a more readable way
+                raise AssertionError(
+                        'pattern "%s" not found in\n"""\\\n%s"""\n'
+                        % (needle_re, haystack))
+            else:
+                raise AssertionError('pattern "%s" not found in "%s"'
+                        % (needle_re, haystack))
+
+    def catchLogEvents(self):
+        """Any log events that are triggered get written to self.log_stream"""
+        logger = logging.getLogger('lp-loggerhead')
+        logger.setLevel(logging.DEBUG)
+        self.log_stream = cStringIO.StringIO()
+        handler = logging.StreamHandler(self.log_stream)
+        handler.setLevel(logging.DEBUG)
+        logger.addHandler(handler)
+        self.addCleanup(logger.removeHandler, handler)
+
+    def _eventTriggered(self, event):
+        if isinstance(event, ErrorReportEvent):
+            self.error_events.append(event)
+
+    def watchForErrorReportEvent(self):
+        self.error_events = []
+        zope.event.subscribers.append(self._eventTriggered)
+        self.addCleanup(zope.event.subscribers.remove, self._eventTriggered)
+
+    def runtime_failing_app(self, environ, start_response):
+        if False:
+            yield None
+        raise RuntimeError('just a generic runtime error.')
+
+    def socket_failing_app(self, environ, start_response):
+        if False:
+            yield None
+        raise socket.error(errno.EPIPE, 'Connection closed')
+
+    def noop_start_response(self, status, response_headers, exc_info=None):
+        def noop_write(chunk):
+            pass
+        return noop_write
+
+    def success_app(self, environ, start_response):
+        writer = start_response('200 OK', {})
+        writer('Successfull\n')
+        return []
+
+    def failing_start_response(self, status, response_headers, exc_info=None):
+        def fail_write(chunk):
+            raise socket.error(errno.EPIPE, 'Connection closed')
+        return fail_write
+
+    def wrap_and_run(self, app, failing_write=False):
+        self.watchForErrorReportEvent()
+        app = oops_middleware(app)
+        # Just random env data, rather than setting up a whole wsgi stack just
+        # to pass in values for this dict
+        environ = {'wsgi.version': (1, 0),
+                   'wsgi.url_scheme': 'http',
+                   'PATH_INFO': '/test/path',
+                   'REQUEST_METHOD': 'GET',
+                   'SERVER_NAME': 'localhost',
+                   'SERVER_PORT': '8080',
+                  }
+        if failing_write:
+            result = list(app(environ, self.failing_start_response))
+        else:
+            result = list(app(environ, self.noop_start_response))
+        return result
+
+    def test_exception_triggers_oops(self):
+        res = self.wrap_and_run(self.runtime_failing_app)
+        # After the exception was raised, we should also have gotten an oops
+        # event
+        self.assertEqual(1, len(self.error_events))
+        event = self.error_events[0]
+        self.assertIsInstance(event, ErrorReportEvent)
+        self.assertIsInstance(event.object, ErrorReport)
+        self.assertEqual('RuntimeError', event.object.type)
+
+    def test_ignores_socket_exceptions(self):
+        self.catchLogEvents()
+        res = self.wrap_and_run(self.socket_failing_app)
+        self.assertEqual(0, len(self.error_events))
+        self.assertContainsRe(self.log_stream.getvalue(),
+            'Caught socket exception from <unknown>:.*Connection closed')
+
+    def test_ignores_writer_failures(self):
+        self.catchLogEvents()
+        res = self.wrap_and_run(self.success_app, failing_write=True)
+        self.assertEqual(0, len(self.error_events))
+        self.assertContainsRe(self.log_stream.getvalue(),
+            'Caught socket exception from <unknown>:.*Connection closed')
+
+
 def test_suite():
     return unittest.TestLoader().loadTestsFromName(__name__)


References