← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~rvb/maas/bulk-load into lp:maas

 

Raphaël Badin has proposed merging lp:~rvb/maas/bulk-load into lp:maas.

Commit message:
Load the mac addresses related to a node from the cache even if .iterator() is used.  Preload the macaddresses and the tags related to a list of nodes when fetching the list of nodes via the api.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1066775 in MAAS: "Main page slow to load with many nodes"
  https://bugs.launchpad.net/maas/+bug/1066775

For more details, see:
https://code.launchpad.net/~rvb/maas/bulk-load/+merge/130535

Load the related mac addresses in bulk. Preload the macaddresses and the tags related to a list of nodes when fetching the list of nodes via the api.

= Notes =

This took some time to figure out a clean way to fix that problem but I think this solution is quite all right and does not interfere too much with the rest of the code.

I tried using [tag.name for tag in self.tags.only('name')] in tag_names (to avoid loading the other fields) but I looks like the cache is not used in this case.

With that change in, a request to http://0.0.0.0:5240/api/1.0/nodes/?op=list with 8000 nodes now takes ~8s.  It probably could be improved but I suspect the bottleneck is the json serialization now.
-- 
https://code.launchpad.net/~rvb/maas/bulk-load/+merge/130535
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~rvb/maas/bulk-load into lp:maas.
=== modified file 'src/maasserver/api.py'
--- src/maasserver/api.py	2012-10-18 16:50:50 +0000
+++ src/maasserver/api.py	2012-10-19 12:07:26 +0000
@@ -87,7 +87,6 @@
 from functools import partial
 import httplib
 from inspect import getdoc
-import simplejson as json
 import sys
 from textwrap import dedent
 
@@ -175,6 +174,7 @@
 from piston.utils import rc
 from provisioningserver.enum import POWER_TYPE
 from provisioningserver.kernel_opts import KernelParameters
+import simplejson as json
 
 
 class OperationsResource(Resource):
@@ -874,6 +874,9 @@
             request.user, NODE_PERMISSION.VIEW, ids=match_ids)
         if match_macs is not None:
             nodes = nodes.filter(macaddress__mac_address__in=match_macs)
+        # Prefetch related macaddresses and tags.
+        nodes = nodes.prefetch_related('macaddress_set__node')
+        nodes = nodes.prefetch_related('tags')
         return nodes.order_by('id')
 
     @operation(idempotent=True)

=== modified file 'src/maasserver/models/macaddress.py'
--- src/maasserver/models/macaddress.py	2012-06-25 12:51:06 +0000
+++ src/maasserver/models/macaddress.py	2012-10-19 12:07:26 +0000
@@ -21,6 +21,7 @@
 from maasserver import DefaultMeta
 from maasserver.fields import MACAddressField
 from maasserver.models.cleansave import CleanSave
+from maasserver.models.managers import BulkManager
 from maasserver.models.timestampedmodel import TimestampedModel
 
 
@@ -38,6 +39,8 @@
     mac_address = MACAddressField(unique=True)
     node = ForeignKey('Node', editable=False)
 
+    objects = BulkManager()
+
     class Meta(DefaultMeta):
         verbose_name = "MAC address"
         verbose_name_plural = "MAC addresses"

=== added file 'src/maasserver/models/managers.py'
--- src/maasserver/models/managers.py	1970-01-01 00:00:00 +0000
+++ src/maasserver/models/managers.py	2012-10-19 12:07:26 +0000
@@ -0,0 +1,32 @@
+# Copyright 2012 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Custom MAAS manager classes."""
+
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+    )
+
+__metaclass__ = type
+__all__ = [
+    'BulkManager',
+    ]
+
+
+from django.db.models import Manager
+
+
+class BulkManager(Manager):
+    """A Manager which loads objects from the cache if it's populated.
+
+    Even when iterator() is explicitely called (which happens in piston when
+    related collections are fetched), this manager will fetch objects in bulk
+    if the cache is populated (i.e. if prefetch_related was used).
+    """
+
+    def iterator(self):
+        return self.all()
+
+

=== modified file 'src/maasserver/models/node.py'
--- src/maasserver/models/node.py	2012-10-15 08:01:19 +0000
+++ src/maasserver/models/node.py	2012-10-19 12:07:26 +0000
@@ -492,7 +492,9 @@
             return self.system_id
 
     def tag_names(self):
-        return self.tags.values_list('name', flat=True)
+        # We don't use self.tags.values_list here because this does not
+        # take advantage of the cache.
+        return [tag.name for tag in self.tags.all()]
 
     def clean_status(self):
         """Check a node's status transition against the node-status FSM."""

=== modified file 'src/maasserver/tests/models.py'
--- src/maasserver/tests/models.py	2012-09-18 14:37:28 +0000
+++ src/maasserver/tests/models.py	2012-10-19 12:07:26 +0000
@@ -17,12 +17,14 @@
 
 from django.db.models import (
     CharField,
+    ForeignKey,
     Model,
     )
 from maasserver.fields import (
     JSONObjectField,
     XMLField,
     )
+from maasserver.models.managers import BulkManager
 from maasserver.models.timestampedmodel import TimestampedModel
 
 
@@ -53,3 +55,13 @@
 class FieldChangeTestModel(Model):
     name1 = CharField(max_length=255, unique=False)
     name2 = CharField(max_length=255, unique=False)
+
+
+class BulkManagerParentTestModel(Model):
+    pass
+
+
+class BulkManagerTestModel(Model):
+    parent = ForeignKey('BulkManagerParentTestModel', editable=False)
+
+    objects = BulkManager()

=== modified file 'src/maasserver/tests/test_api.py'
--- src/maasserver/tests/test_api.py	2012-10-19 07:56:19 +0000
+++ src/maasserver/tests/test_api.py	2012-10-19 12:07:26 +0000
@@ -1657,6 +1657,30 @@
             [node1.system_id, node2.system_id],
             extract_system_ids(parsed_result))
 
+    def create_nodes(self, nodegroup, nb):
+        [factory.make_node(nodegroup=nodegroup, mac=True)
+            for i in range(nb)]
+
+    def test_GET_list_nodes_issues_constant_number_of_queries(self):
+        nodegroup = factory.make_node_group()
+        self.create_nodes(nodegroup, 10)
+        num_queries1, response1 = self.getNumQueries(
+            self.client.get, self.get_uri('nodes/'), {'op': 'list'})
+        self.create_nodes(nodegroup, 10)
+        num_queries2, response2 = self.getNumQueries(
+            self.client.get, self.get_uri('nodes/'), {'op': 'list'})
+        # Make sure the responses are ok as it's not useful to compare the
+        # number of queries if they are not.
+        self.assertEqual(
+            [httplib.OK, httplib.OK, 10, 20],
+            [
+                response1.status_code,
+                response2.status_code,
+                len(extract_system_ids(json.loads(response1.content))),
+                len(extract_system_ids(json.loads(response2.content))),
+            ])
+        self.assertEqual(num_queries1, num_queries2)
+
     def test_GET_list_without_nodes_returns_empty_list(self):
         # If there are no nodes to list, the "list" op still works but
         # returns an empty list.

=== added file 'src/maasserver/tests/test_managers.py'
--- src/maasserver/tests/test_managers.py	1970-01-01 00:00:00 +0000
+++ src/maasserver/tests/test_managers.py	2012-10-19 12:07:26 +0000
@@ -0,0 +1,42 @@
+# Copyright 2012 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Test maasserver model managers."""
+
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+    )
+
+__metaclass__ = type
+__all__ = []
+
+from maasserver.testing.testcase import TestModelTestCase
+from maasserver.tests.models import (
+    BulkManagerParentTestModel, BulkManagerTestModel)
+
+
+class BulkManagerTest(TestModelTestCase):
+
+    app = 'maasserver.tests'
+
+    def test_manager_iterator_uses_cache(self):
+        parents = set()
+        for i in range(3):
+            parents.add(BulkManagerParentTestModel.objects.create())
+        for i in range(10):
+            for parent in parents:
+                BulkManagerTestModel.objects.create(parent=parent)
+        parents = BulkManagerParentTestModel.objects.all().prefetch_related(
+            'bulkmanagertestmodel_set')
+        # Only two queries are used to fetch all the objects:
+        # One to fetch the parents, one to fetch the childrens (the query from
+        # the prefetch_related statement).
+        # Even if we call iterator() on the related objects, the cache is
+        # used because BulkManagerTestModel has a manager based on
+        # BulkManager.
+        self.assertNumQueries(
+            2,
+            lambda: [list(parent.bulkmanagertestmodel_set.iterator())
+                for parent in parents])