← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:vendor-html5browser into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:vendor-html5browser into launchpad:master.

Commit message:
Import an improved version of html5browser

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/377186

This is based on lp:html5-browser, with various improvements.

We use this for YUI tests, but the Launchpad and PyPI projects for it have a single maintainer who doesn't seem responsive, and we need to fix some deprecation warnings and do some other maintenance.  Fortunately it's a thin wrapper around WebKit, so the simplest approach is to just take our own copy and fix it up.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:vendor-html5browser into launchpad:master.
diff --git a/constraints.txt b/constraints.txt
index e96e5e3..a180729 100644
--- a/constraints.txt
+++ b/constraints.txt
@@ -208,7 +208,6 @@ futures==3.2.0
 geoip2==2.9.0
 grokcore.component==1.6
 gunicorn==19.8.1
-html5browser==0.0.9
 httplib2==0.8
 hyperlink==18.0.0
 importlib==1.0.2
diff --git a/lib/lp/testing/__init__.py b/lib/lp/testing/__init__.py
index c3cf774..85ffd79 100644
--- a/lib/lp/testing/__init__.py
+++ b/lib/lp/testing/__init__.py
@@ -1097,7 +1097,7 @@ class AbstractYUITestCase(TestCase):
         super(AbstractYUITestCase, self).setUp()
         # html5browser imports from the gir/pygtk stack which causes
         # twisted tests to break because of gtk's initialize.
-        import html5browser
+        from lp.testing import html5browser
         client = html5browser.Browser()
         page = client.load_page(self.html_uri,
                                 timeout=self.suite_timeout,
diff --git a/lib/lp/testing/html5browser.py b/lib/lp/testing/html5browser.py
new file mode 100644
index 0000000..e6099e1
--- /dev/null
+++ b/lib/lp/testing/html5browser.py
@@ -0,0 +1,205 @@
+# Copyright (C) 2011 - Curtis Hovey <sinzui.is at verizon.net>
+# Copyright 2020 Canonical Ltd.
+#
+# This software is licensed under the MIT license:
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""A Web browser that can be driven by an application."""
+
+__metaclass__ = type
+__all__ = [
+    'Browser',
+    'Command',
+    ]
+
+import gi
+
+gi.require_version('Gtk', '3.0')
+gi.require_version('WebKit', '3.0')
+
+from gi.repository import (
+    GLib,
+    Gtk,
+    WebKit,
+    )
+
+
+class Command:
+    """A representation of the status and result of a command."""
+    STATUS_RUNNING = object()
+    STATUS_COMPLETE = object()
+
+    CODE_UNKNOWN = -1
+    CODE_SUCCESS = 0
+    CODE_FAIL = 1
+
+    def __init__(self, status=STATUS_RUNNING, return_code=CODE_UNKNOWN,
+                 content=None):
+        self.status = status
+        self.return_code = return_code
+        self.content = content
+
+
+class Browser(WebKit.WebView):
+    """A browser that can be driven by an application."""
+
+    STATUS_PREFIX = '::::'
+    TIMEOUT = 5000
+    INCREMENTAL_PREFIX = '>>>>'
+    INITIAL_TIMEOUT = None
+    INCREMENTAL_TIMEOUT = None
+
+    def __init__(self, show_window=False, hide_console_messages=True):
+        super(Browser, self).__init__()
+        self.show_window = show_window
+        self.hide_console_messages = hide_console_messages
+        self.browser_window = None
+        self.script = None
+        self.command = None
+        self.listeners = {}
+        self._connect('console-message', self._on_console_message, False)
+
+    def load_page(self, uri,
+                  timeout=TIMEOUT,
+                  initial_timeout=INITIAL_TIMEOUT,
+                  incremental_timeout=INCREMENTAL_TIMEOUT):
+        """Load a page and return the content."""
+        self._setup_listening_operation(
+            timeout, initial_timeout, incremental_timeout)
+        if uri.startswith('/'):
+            uri = 'file://' + uri
+        self.load_uri(uri)
+        Gtk.main()
+        return self.command
+
+    def run_script(self, script,
+                   timeout=TIMEOUT,
+                   initial_timeout=INITIAL_TIMEOUT,
+                   incremental_timeout=INCREMENTAL_TIMEOUT):
+        """Run a script and return the result."""
+        self._setup_listening_operation(
+            timeout, initial_timeout, incremental_timeout)
+        self.script = script
+        self._connect('notify::load-status', self._on_script_load_finished)
+        self.load_string(
+            '<html><head></head><body></body></html>',
+            'text/html', 'UTF-8', 'file:///')
+        Gtk.main()
+        return self.command
+
+    def _setup_listening_operation(self, timeout, initial_timeout,
+                                   incremental_timeout):
+        """Setup a one-time listening operation for command's completion."""
+        self._create_window()
+        self.command = Command()
+        self._last_status = None
+        self._incremental_timeout = incremental_timeout
+        self._connect(
+            'status-bar-text-changed', self._on_status_bar_text_changed)
+        self._timeout_source = GLib.timeout_add(timeout, self._on_timeout)
+        if initial_timeout is None:
+            initial_timeout = incremental_timeout
+        if initial_timeout is not None:
+            self._incremental_timeout_source = GLib.timeout_add(
+                initial_timeout, self._on_timeout)
+        else:
+            self._incremental_timeout_source = None
+
+    def _create_window(self):
+        """Create a window needed to render pages."""
+        if self.browser_window is not None:
+            return
+        self.browser_window = Gtk.Window()
+        self.browser_window.set_default_size(800, 600)
+        self.browser_window.connect("destroy", self._on_quit)
+        scrolled = Gtk.ScrolledWindow()
+        scrolled.add(self)
+        self.browser_window.add(scrolled)
+        if self.show_window:
+            self.browser_window.show_all()
+
+    def _on_quit(self, widget=None):
+        Gtk.main_quit()
+
+    def _clear_status(self):
+        self.execute_script('window.status = "";')
+
+    def _on_status_bar_text_changed(self, view, text):
+        if text.startswith(self.INCREMENTAL_PREFIX):
+            self._clear_incremental_timeout()
+            self._clear_status()
+            self._last_status = text[4:]
+            if self._incremental_timeout:
+                self._incremental_timeout_source = GLib.timeout_add(
+                    self._incremental_timeout, self._on_timeout)
+        elif text.startswith(self.STATUS_PREFIX):
+            self._clear_timeout()
+            self._clear_incremental_timeout()
+            self._disconnect('status-bar-text-changed')
+            self._clear_status()
+            self.command.status = Command.STATUS_COMPLETE
+            self.command.return_code = Command.CODE_SUCCESS
+            self.command.content = text[4:]
+            self._on_quit()
+
+    def _on_script_load_finished(self, view, load_status):
+        # pywebkit does not have WebKit.LoadStatus.FINISHED.
+        statuses = ('WEBKIT_LOAD_FINISHED', 'WEBKIT_LOAD_FAILED')
+        if self.props.load_status.value_name not in statuses:
+            return
+        self._disconnect('notify::load-status')
+        self.execute_script(self.script)
+        self.script = None
+
+    def _clear_incremental_timeout(self):
+        if self._incremental_timeout_source is not None:
+            GLib.source_remove(self._incremental_timeout_source)
+            self._incremental_timeout_source = None
+
+    def _clear_timeout(self):
+        if self._timeout_source is not None:
+            GLib.source_remove(self._timeout_source)
+            self._timeout_source = None
+
+    def _on_timeout(self):
+        self._clear_timeout()
+        self._clear_incremental_timeout()
+        if self.command.status is not Command.STATUS_COMPLETE:
+            self._disconnect()
+            self.command.status = Command.STATUS_COMPLETE
+            self.command.return_code = Command.CODE_FAIL
+            self.command.content = self._last_status
+            self._on_quit()
+        return False
+
+    def _on_console_message(self, view, message, line_no, source_id, data):
+        return self.hide_console_messages
+
+    def _connect(self, signal, callback, *args):
+        self.listeners[signal] = self.connect(signal, callback, *args)
+
+    def _disconnect(self, signal=None):
+        if signal is None:
+            signals = self.listeners.keys()
+        elif isinstance(signal, str):
+            signals = [signal]
+        for key in signals:
+            self.disconnect(self.listeners[key])
+            del self.listeners[key]
diff --git a/lib/lp/testing/tests/test_html5browser.py b/lib/lp/testing/tests/test_html5browser.py
new file mode 100644
index 0000000..240b618
--- /dev/null
+++ b/lib/lp/testing/tests/test_html5browser.py
@@ -0,0 +1,233 @@
+# Copyright (C) 2011 - Curtis Hovey <sinzui.is at verizon.net>
+# Copyright 2020 Canonical Ltd.
+#
+# This software is licensed under the MIT license:
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from tempfile import NamedTemporaryFile
+
+from lp.testing import TestCase
+from lp.testing.html5browser import (
+    Command,
+    Browser,
+    )
+
+
+load_page_set_window_status_returned = """\
+    <html><head>
+    <script type="text/javascript">
+    window.status = '::::fnord';
+    </script>
+    </head><body></body></html>
+    """
+
+incremental_timeout_page = """\
+    <html><head>
+    <script type="text/javascript">
+    window.status = '>>>>shazam';
+    </script>
+    </head><body></body></html>
+    """
+
+
+load_page_set_window_status_ignores_non_commands = """\
+    <html><head>
+    <script type="text/javascript">
+    window.status = 'snarf';
+    </script>
+    </head><body>
+    <script type="text/javascript">
+    window.status = '::::pting';
+    </script>
+    </body></html>
+    """
+
+timeout_page = """\
+    <html><head></head><body></body></html>
+    """
+
+initial_long_wait_page = """\
+    <html><head>
+    <script type="text/javascript">
+    setTimeout(function() {
+      window.status = '>>>>initial';
+      setTimeout(function() {window.status = '::::ended'}, 200);
+    }, 1000);
+    </script>
+    </head><body></body></html>"""
+
+
+class BrowserTestCase(TestCase):
+    """Verify Browser methods."""
+
+    def setUp(self):
+        super(BrowserTestCase, self).setUp()
+        self.file = NamedTemporaryFile(prefix='html5browser_', suffix='.html')
+        self.addCleanup(self.file.close)
+
+    def test_init_default(self):
+        browser = Browser()
+        self.assertFalse(browser.show_window)
+        self.assertTrue(browser.hide_console_messages)
+        self.assertIsNone(browser.command)
+        self.assertIsNone(browser.script)
+        self.assertIsNone(browser.browser_window)
+        self.assertEqual(['console-message'], browser.listeners.keys())
+
+    def test_init_show_browser(self):
+        # The Browser can be set to show the window.
+        browser = Browser(show_window=True)
+        self.assertTrue(browser.show_window)
+
+    def test_load_page_set_window_status_returned(self):
+        # When window status is set with leading ::::, the command ends.
+        self.file.write(load_page_set_window_status_returned)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(self.file.name)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_SUCCESS, command.return_code)
+        self.assertEqual('fnord', command.content)
+        self.assertEqual('::::', Browser.STATUS_PREFIX)
+
+    def test_load_page_set_window_status_ignored_non_commands(self):
+        # Setting window status without a leading :::: is ignored.
+        self.file.write(load_page_set_window_status_ignores_non_commands)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(self.file.name)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_SUCCESS, command.return_code)
+        self.assertEqual('pting', command.content)
+
+    def test_load_page_initial_timeout(self):
+        # If a initial_timeout is set, it can cause a timeout.
+        self.file.write(timeout_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(
+            self.file.name, initial_timeout=1000, timeout=30000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+
+    def test_load_page_incremental_timeout(self):
+        # If an incremental_timeout is set, it can cause a timeout.
+        self.file.write(timeout_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(
+            self.file.name, incremental_timeout=1000, timeout=30000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+
+    def test_load_page_initial_timeout_has_precedence_first(self):
+        # If both an initial_timeout and an incremental_timeout are set,
+        # initial_timeout takes precedence for the first wait.
+        self.file.write(initial_long_wait_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(
+            self.file.name, initial_timeout=3000,
+            incremental_timeout=500, timeout=30000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_SUCCESS, command.return_code)
+        self.assertEqual('ended', command.content)
+
+    def test_load_page_incremental_timeout_has_precedence_second(self):
+        # If both an initial_timeout and an incremental_timeout are set,
+        # incremental_timeout takes precedence for the second wait.
+        self.file.write(initial_long_wait_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(
+            self.file.name, initial_timeout=3000,
+            incremental_timeout=100, timeout=30000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+        self.assertEqual('initial', command.content)
+
+    def test_load_page_timeout_always_wins(self):
+        # If timeout, initial_timeout, and incremental_timeout are set,
+        # the main timeout will still be honored.
+        self.file.write(initial_long_wait_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(
+            self.file.name, initial_timeout=3000,
+            incremental_timeout=3000, timeout=100)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+        self.assertIsNone(command.content)
+
+    def test_load_page_default_timeout_values(self):
+        # Verify our expected class defaults.
+        self.assertEqual(5000, Browser.TIMEOUT)
+        self.assertIsNone(Browser.INITIAL_TIMEOUT)
+        self.assertIsNone(Browser.INCREMENTAL_TIMEOUT)
+
+    def test_load_page_timeout(self):
+        # A page that does not set window.status in 5 seconds will timeout.
+        self.file.write(timeout_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(self.file.name, timeout=1000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+
+    def test_load_page_set_window_status_incremental_timeout(self):
+        # Any incremental information is returned on a timeout.
+        self.file.write(incremental_timeout_page)
+        self.file.flush()
+        browser = Browser()
+        command = browser.load_page(self.file.name, timeout=1000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+        self.assertEqual('shazam', command.content)
+
+    def test_run_script_timeout(self):
+        # A script that does not set window.status in 5 seconds will timeout.
+        browser = Browser()
+        script = "document.body.innerHTML = '<p>fnord</p>';"
+        command = browser.run_script(script, timeout=1000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_FAIL, command.return_code)
+
+    def test_run_script_complete(self):
+        # A script that sets window.status with the status prefix completes.
+        browser = Browser()
+        script = (
+            "document.body.innerHTML = '<p>pting</p>';"
+            "window.status = '::::' + document.body.innerText;")
+        command = browser.run_script(script, timeout=1000)
+        self.assertEqual(Command.STATUS_COMPLETE, command.status)
+        self.assertEqual(Command.CODE_SUCCESS, command.return_code)
+        self.assertEqual('pting', command.content)
+
+    def test__on_console_message(self):
+        # The method returns the value of hide_console_messages.
+        # You should not see "** Message: console message:" on stderr
+        # when running this test.
+        browser = Browser(hide_console_messages=True)
+        script = (
+            "console.log('hello');"
+            "window.status = '::::goodbye;'")
+        browser.run_script(script, timeout=1000)
+        self.assertTrue(
+            browser._on_console_message(browser, 'message', 1, None, None))
diff --git a/setup.py b/setup.py
index 493ee5a..600dfb2 100644
--- a/setup.py
+++ b/setup.py
@@ -164,7 +164,6 @@ setup(
         'fixtures',
         'geoip2',
         'gunicorn[gthread]',
-        'html5browser',
         'importlib-resources',
         'ipaddress',
         'ipython',