← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jameinel/maas/tag-lxml into lp:maas

 

John A Meinel has proposed merging lp:~jameinel/maas/tag-lxml into lp:maas.

Commit message:
Evaluate tags using LXML rather than using postgres's xpath* functions.

This gives us a little bit more control, and allows us to farm the work out into workers, rather than having to do everything inside the database (1 DB, N workers).
Also, it ends up cleaning up a fair amount of code, because we don't touch db transactions while evaluating the xpath content.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~jameinel/maas/tag-lxml/+merge/127427

This is a necessary step along the path to farming out asynchronous tag updates to workers, rather than having all the work done in the DB.

-- 
https://code.launchpad.net/~jameinel/maas/tag-lxml/+merge/127427
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jameinel/maas/tag-lxml into lp:maas.
=== modified file 'src/maasserver/api.py'
--- src/maasserver/api.py	2012-10-02 04:52:08 +0000
+++ src/maasserver/api.py	2012-10-02 07:26:25 +0000
@@ -1344,7 +1344,13 @@
     def new(self, request):
         """Create a new `Tag`.
         """
-        return create_tag(request)
+        if not request.user.is_superuser:
+            raise PermissionDenied()
+        form = TagForm(request.data)
+        if form.is_valid():
+            return form.save()
+        else:
+            raise ValidationError(form.errors)
 
     @operation(idempotent=True)
     def list(self, request):
@@ -1357,23 +1363,6 @@
         return ('tags_handler', [])
 
 
-def create_tag(request):
-    """Service an http request to create a tag.
-
-    :param request: The http request for this node to be created.
-    :return: A `Tag`.
-    :rtype: :class:`maasserver.models.Tag`.
-    :raises: ValidationError
-    """
-    if not request.user.is_superuser:
-        raise PermissionDenied()
-    form = TagForm(request.data)
-    if form.is_valid():
-        return form.save()
-    else:
-        raise ValidationError(form.errors)
-
-
 class MAASHandler(OperationsHandler):
     """Manage the MAAS' itself."""
     create = read = update = delete = None

=== modified file 'src/maasserver/forms.py'
--- src/maasserver/forms.py	2012-09-28 07:31:54 +0000
+++ src/maasserver/forms.py	2012-10-02 07:26:25 +0000
@@ -51,6 +51,7 @@
     Form,
     ModelForm,
     )
+from lxml import etree
 from maasserver.config_forms import SKIP_CHECK_NAME
 from maasserver.enum import (
     ARCHITECTURE,
@@ -778,3 +779,12 @@
             'comment',
             'definition',
             )
+
+    def clean_definition(self):
+        definition = self.cleaned_data['definition']
+        try:
+            etree.XPath(definition)
+        except etree.XPathSyntaxError as e:
+            msg = 'Invalid xpath expression: %s' % (e,)
+            raise ValidationError({'definition': [msg]})
+        return definition

=== modified file 'src/maasserver/models/node.py'
--- src/maasserver/models/node.py	2012-10-01 19:16:26 +0000
+++ src/maasserver/models/node.py	2012-10-02 07:26:25 +0000
@@ -16,7 +16,6 @@
     "update_hardware_details",
     ]
 
-import contextlib
 import os
 from string import whitespace
 from uuid import uuid1
@@ -27,7 +26,6 @@
     PermissionDenied,
     ValidationError,
     )
-from django.db import connection
 from django.db.models import (
     BooleanField,
     CharField,
@@ -38,6 +36,7 @@
     Q,
     )
 from django.shortcuts import get_object_or_404
+from lxml import etree
 from maasserver import DefaultMeta
 from maasserver.enum import (
     ARCHITECTURE,
@@ -322,6 +321,11 @@
         return processed_nodes
 
 
+_xpath_processor_count = "count(//node[@id='core']/node[@class='processor'])"
+_xpath_memory_bytes = "//node[@id='memory']/size[@units='bytes']/text()"
+_mibibyte = 1024 * 1024
+
+
 def update_hardware_details(node, xmlbytes):
     """Set node hardware_details from lshw output and update related fields
 
@@ -334,31 +338,27 @@
     * Scalar returns from xpath() work in postgres 9.2 or later only.
     """
     node.hardware_details = xmlbytes
-    node.save()
-    with contextlib.closing(connection.cursor()) as cursor:
-        cursor.execute("SELECT"
-            " array_length(xpath(%s, hardware_details), 1) AS count,"
-            " (xpath(%s, hardware_details))[1]::text::bigint / 1048576 AS mem"
-            " FROM maasserver_node"
-            " WHERE id = %s",
-            [
-                "//node[@id='core']/node[@class='processor']",
-                "//node[@id='memory']/size[@units='bytes']/text()",
-                node.id,
-            ])
-        cpu_count, memory = cursor.fetchone()
-        node.cpu_count = cpu_count or 0
-        node.memory = memory or 0
-        for tag in Tag.objects.all():
-            cursor.execute(
-                "SELECT xpath_exists(%s, hardware_details)"
-                " FROM maasserver_node WHERE id = %s",
-                [tag.definition,  node.id])
-            has_tag, = cursor.fetchone()
-            if has_tag:
-                node.tags.add(tag)
-            else:
-                node.tags.remove(tag)
+    parser = etree.XMLParser(recover=True)
+    doc = etree.XML(node.hardware_details, parser)
+    # Same document, many queries: use XPathEvaluator.
+    evaluator = etree.XPathEvaluator(doc)
+    cpu_count = evaluator(_xpath_processor_count)
+    memory = evaluator(_xpath_memory_bytes)
+    if not memory or len(memory) != 1:
+        memory = 0
+    else:
+        try:
+            memory = int(memory[0]) / _mibibyte
+        except ValueError:
+            memory = 0
+    node.cpu_count = cpu_count or 0
+    node.memory = memory
+    for tag in Tag.objects.all():
+        has_tag = evaluator(tag.definition)
+        if has_tag:
+            node.tags.add(tag)
+        else:
+            node.tags.remove(tag)
     node.save()
 
 

=== modified file 'src/maasserver/models/tag.py'
--- src/maasserver/models/tag.py	2012-09-29 10:26:33 +0000
+++ src/maasserver/models/tag.py	2012-10-02 07:26:25 +0000
@@ -16,6 +16,7 @@
 
 from django.core.exceptions import (
     PermissionDenied,
+    ValidationError,
     )
 from django.core.validators import RegexValidator
 from django.db.models import (
@@ -25,6 +26,7 @@
     Q,
     )
 from django.shortcuts import get_object_or_404
+from lxml import etree
 from maasserver import DefaultMeta
 from maasserver.models.cleansave import CleanSave
 from maasserver.models.timestampedmodel import TimestampedModel
@@ -116,16 +118,21 @@
         """Find all nodes that match this tag, and update them."""
         # Local import to avoid circular reference
         from maasserver.models import Node
-        # First destroy the existing tags
+        # First make sure we have a valid definition
+        try:
+            # Many documents, one definition: use XPath.
+            xpath = etree.XPath(self.definition)
+        except etree.XPathSyntaxError as e:
+            msg = 'Invalid xpath expression: %s' % (e,)
+            raise ValidationError({'definition': [msg]})
+        # Now delete the existing tags
         self.node_set.clear()
-        # Now figure out what matches the new definition
-        for node in Node.objects.raw(
-                'SELECT id FROM maasserver_node'
-                ' WHERE xpath_exists(%s, hardware_details)',
-                [self.definition]):
-            # Is there an efficiency difference between doing
-            # 'node.tags.add(self)' and 'self.node_set.add(node)' ?
-            node.tags.add(self)
+        # And figure out what matches the new definition
+        parser = etree.XMLParser(recover=True)
+        for node in Node.objects.filter(hardware_details__isnull=False):
+            doc = etree.XML(node.hardware_details, parser)
+            if xpath(doc):
+                node.tags.add(self)
 
     def save(self, *args, **kwargs):
         super(Tag, self).save(*args, **kwargs)

=== modified file 'src/maasserver/tests/test_api.py'
--- src/maasserver/tests/test_api.py	2012-10-01 20:50:21 +0000
+++ src/maasserver/tests/test_api.py	2012-10-02 07:26:25 +0000
@@ -35,7 +35,6 @@
 from django.conf import settings
 from django.contrib.auth.models import AnonymousUser
 from django.core.urlresolvers import reverse
-from django.db.utils import DatabaseError
 from django.http import QueryDict
 from fixtures import Fixture
 from maasserver import api
@@ -2456,18 +2455,10 @@
             {'definition': 'invalid::tag'})
 
         self.assertEqual(httplib.BAD_REQUEST, response.status_code)
-        # Note: JAM 2012-09-26 we'd like to assert that the state in the DB is
-        #       properly aborted. However, the transactions are being handled
-        #       by TransactionMiddleware and are not hooked up in the test
-        #       suite. And the DB is currently in 'pending rollback' state, so
-        #       we cannot inspect it. So we test that we get a proper error
-        #       back, and test that the DB is in abort state. To test that
-        #       TransactionMiddleware is properly functioning needs a higher
-        #       level test.
-        self.assertRaises(DatabaseError, Tag.objects.all().count)
-        # tag = reload_object(tag)
-        # self.assertItemsEqual([tag.name], node.tag_names())
-        # self.assertEqual('/node/foo', tag.definition)
+        # The tag should not be modified
+        tag = reload_object(tag)
+        self.assertItemsEqual([tag.name], node.tag_names())
+        self.assertEqual('//child', tag.definition)
 
 
 class TestTagsAPI(APITestCase):

=== modified file 'src/maasserver/tests/test_tag.py'
--- src/maasserver/tests/test_tag.py	2012-09-29 10:26:33 +0000
+++ src/maasserver/tests/test_tag.py	2012-10-02 07:26:25 +0000
@@ -13,12 +13,9 @@
 __all__ = []
 
 from django.core.exceptions import ValidationError
-from django.db import transaction
-from django.db.utils import DatabaseError
 from maasserver.models import Tag
 from maasserver.testing.factory import factory
 from maasserver.testing.testcase import TestCase
-from maastesting.djangotestcase import TransactionTestCase
 
 
 class TagTest(TestCase):
@@ -127,26 +124,11 @@
         self.assertItemsEqual([node1, node2],
                               Tag.objects.get_nodes(tag.name, user2))
 
-
-class TestTagTransactions(TransactionTestCase):
-
     def test_rollsback_invalid_xpath(self):
-        @transaction.commit_manually
-        def setup():
-            node = factory.make_node()
-            node.set_hardware_details('<node><foo /></node>')
-            tag = factory.make_tag(definition='/node/foo')
-            self.assertItemsEqual([tag.name], node.tag_names())
-            transaction.commit()
-            return tag, node
-        tag, node = setup()
-
-        @transaction.commit_manually
-        def trigger_invalid():
-            tag.definition = 'invalid::tag'
-            self.assertRaises(DatabaseError, tag.save)
-            transaction.rollback()
-        # Because the definition is invalid, the db should not have been
-        # updated
-        trigger_invalid()
+        node = factory.make_node()
+        node.set_hardware_details('<node><foo /></node>')
+        tag = factory.make_tag(definition='/node/foo')
+        self.assertItemsEqual([tag.name], node.tag_names())
+        tag.definition = 'invalid::tag'
+        self.assertRaises(ValidationError, tag.save)
         self.assertItemsEqual([tag.name], node.tag_names())