launchpad-reviewers team mailing list archive
  
  - 
     launchpad-reviewers team launchpad-reviewers team
- 
    Mailing list archive
  
- 
    Message #02449
  
 [Merge] lp:~lifeless/launchpad/bugtask+index	into lp:launchpad
  
Robert Collins has proposed merging lp:~lifeless/launchpad/bugtask+index into lp:launchpad.
Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~lifeless/launchpad/bugtask+index/+merge/47969
fix bug 704446 - populate the bugmessage index so that we can make indexed_messages lazier or do range queries etc. I have an XXX in this diff saying we may need an index on index itself; my tests on qastaging show that we will eventually, but I'm going to land that as a separate patch (please ignore the XXX for now).
-- 
https://code.launchpad.net/~lifeless/launchpad/bugtask+index/+merge/47969
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/launchpad/bugtask+index into lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg	2011-01-26 22:29:22 +0000
+++ database/schema/security.cfg	2011-01-31 08:23:35 +0000
@@ -2020,6 +2020,7 @@
 public.bugsubscriptionfilterimportance  = SELECT
 public.bugsubscriptionfiltertag         = SELECT
 public.bugaffectsperson                 = SELECT
+public.bugmessage                       = SELECT, UPDATE
 public.bugnotification                  = SELECT, DELETE
 public.bugnotificationrecipientarchive  = SELECT
 public.bugtag                           = SELECT
=== modified file 'lib/lp/bugs/configure.zcml'
--- lib/lp/bugs/configure.zcml	2011-01-27 16:43:57 +0000
+++ lib/lp/bugs/configure.zcml	2011-01-31 08:23:35 +0000
@@ -796,6 +796,7 @@
             <require
                 permission="launchpad.Admin"
                 attributes="
+                    indexMessages
                     setCommentVisibility
                     setHeat"
                 set_attributes="heat_last_updated" />
@@ -836,7 +837,7 @@
             interface="lp.bugs.interfaces.bugmessage.IBugMessage"/>
         <require
             permission="launchpad.Admin"
-            set_attributes="remote_comment_id bugwatch visible"/>
+            set_attributes="remote_comment_id bugwatch visible index"/>
     </class>
 
     <!-- BugMessageSet -->
=== modified file 'lib/lp/bugs/interfaces/bug.py'
--- lib/lp/bugs/interfaces/bug.py	2011-01-27 16:43:57 +0000
+++ lib/lp/bugs/interfaces/bug.py	2011-01-31 08:23:35 +0000
@@ -459,6 +459,9 @@
     def unsubscribeFromDupes(person, unsubscribed_by):
         """Remove this person's subscription from all dupes of this bug."""
 
+    def indexMessages():
+        """Fill in the `BugMessage`.index column for this bugs messages."""
+
     def isSubscribed(person):
         """Is person subscribed to this bug?
 
=== modified file 'lib/lp/bugs/interfaces/bugmessage.py'
--- lib/lp/bugs/interfaces/bugmessage.py	2010-09-24 22:30:48 +0000
+++ lib/lp/bugs/interfaces/bugmessage.py	2011-01-31 08:23:35 +0000
@@ -40,6 +40,13 @@
     """A link between a bug and a message."""
 
     bug = Object(schema=IBug, title=u"The bug.")
+    # The index field is being populated in the DB; once complete it will be
+    # made required. Whether to make it readonly or not is dependent on UI
+    # considerations. If, once populated, it becomes read-write, we probably
+    # want to ensure that only actions like bug import or spam hiding can
+    # change it, rather than arbitrary API scripts.
+    index = Int(title=u'The comment number', required=False, readonly=False,
+        default=None)
     messageID = Int(title=u"The message id.", readonly=True)
     message = Object(schema=IMessage, title=u"The message.")
     bugwatch = Object(schema=IBugWatch,
=== modified file 'lib/lp/bugs/model/bug.py'
--- lib/lp/bugs/model/bug.py	2011-01-21 21:42:40 +0000
+++ lib/lp/bugs/model/bug.py	2011-01-31 08:23:35 +0000
@@ -432,12 +432,19 @@
         """See `IBug`."""
         return self.users_affected_with_dupes.count()
 
+    def indexMessages(self):
+        """See `IBug`."""
+        indexed_messages = self._indexed_messages(include_bugmessage=True)
+        for indexed_message, bugmessage in indexed_messages:
+            bugmessage.index = indexed_message.index
+
     @property
     def indexed_messages(self):
         """See `IMessageTarget`."""
         return self._indexed_messages(include_content=True)
 
-    def _indexed_messages(self, include_content=False, include_parents=True):
+    def _indexed_messages(self, include_content=False, include_parents=True,
+        include_bugmessage=False):
         """Get the bugs messages, indexed.
 
         :param include_content: If True retrieve the content for the messages
@@ -445,12 +452,14 @@
         :param include_parents: If True retrieve the object for parent
             messages too. If False the parent attribute will be *forced* to
             None to reduce database lookups.
+        :param include_bugmessage: If True returns tuples (message,
+            bugmessage). This exists for the indexMessages API.
         """
         # Make all messages be 'in' the main bugtask.
         inside = self.default_bugtask
         store = Store.of(self)
         message_by_id = {}
-        if include_parents:
+        if include_parents or include_bugmessage:
             to_messages = lambda rows: [row[0] for row in rows]
         else:
             to_messages = lambda rows: rows
@@ -495,18 +504,26 @@
         def index_message(row, index):
             # convert row to an IndexedMessage
             if include_parents:
-                message, parent = row
+                if include_bugmessage:
+                    message, parent, bugmessage = row
+                else:
+                    message, parent = row
                 if parent is not None:
                     # If there is an IndexedMessage available as parent, use
                     # that to reduce on-demand parent lookups.
                     parent = message_by_id.get(parent.id, parent)
             else:
-                message = row
+                if include_bugmessage:
+                    message, bugmessage = row
+                else:
+                    message = row
                 parent = None # parent attribute is not going to be accessed.
             result = IndexedMessage(message, inside, index, parent)
             # This message may be the parent for another: stash it to permit
             # use.
             message_by_id[message.id] = result
+            if include_bugmessage:
+                result = result, bugmessage
             return result
         # There is possibly some nicer way to do this in storm, but
         # this is a lot easier to figure out.
@@ -519,13 +536,19 @@
     parent_message.id in (
         select bugmessage.message from bugmessage where bugmessage.bug=%s)),
 BugMessage""" % sqlvalues(self.id))
+            lookup = Message, ParentMessage
+            if include_bugmessage:
+                lookup = lookup + (BugMessage,)
             results = store.using(tables).find(
-                (Message, ParentMessage),
+                lookup,
                 BugMessage.bugID == self.id,
                 BugMessage.messageID == Message.id,
                 )
         else:
-            results = store.find(Message,
+            lookup = Message
+            if include_bugmessage:
+                lookup = lookup, BugMessage
+            results = store.find(lookup,
                 BugMessage.bugID == self.id,
                 BugMessage.messageID == Message.id,
                 )
=== modified file 'lib/lp/bugs/model/bugmessage.py'
--- lib/lp/bugs/model/bugmessage.py	2010-10-06 18:53:53 +0000
+++ lib/lp/bugs/model/bugmessage.py	2011-01-31 08:23:35 +0000
@@ -11,6 +11,7 @@
 from sqlobject import (
     BoolCol,
     ForeignKey,
+    IntCol,
     StringCol,
     )
 from storm.store import Store
@@ -44,8 +45,9 @@
         notNull=False, default=None)
     remote_comment_id = StringCol(notNull=False, default=None)
     visible = BoolCol(notNull=True, default=True)
-    # -- Uncomment when deployed.
-    # index = IntCol()
+    # -- The index of the message is cached in the DB. Currently optional until
+    # we migrate all data.
+    index = IntCol(default=None)
 
 
 class BugMessageSet:
=== modified file 'lib/lp/bugs/tests/test_bug_messages.py'
--- lib/lp/bugs/tests/test_bug_messages.py	2010-10-04 19:50:45 +0000
+++ lib/lp/bugs/tests/test_bug_messages.py	2011-01-31 08:23:35 +0000
@@ -35,3 +35,8 @@
         # IIndexedMessage.
         for indexed_message in self.bug_2.indexed_messages:
             self.failUnlessEqual(None, indexed_message.parent)
+
+    def test_indexMessages_indexes_messages(self):
+        self.bug_2.bug_messages[0].index = None
+        self.bug_2.indexMessages()
+        self.assertEqual(0, self.bug_2.bug_messages[0].index)
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py	2010-12-02 16:13:51 +0000
+++ lib/lp/scripts/garbo.py	2011-01-31 08:23:35 +0000
@@ -53,6 +53,7 @@
 from lp.bugs.interfaces.bug import IBugSet
 from lp.bugs.model.bug import Bug
 from lp.bugs.model.bugattachment import BugAttachment
+from lp.bugs.model.bugmessage import BugMessage
 from lp.bugs.model.bugnotification import BugNotification
 from lp.bugs.model.bugwatch import BugWatch
 from lp.bugs.scripts.checkwatches.scheduler import (
@@ -526,6 +527,32 @@
                 % chunk_size)
 
 
+class BugMessageIndexer(TunableLoop):
+    """Index `BugMessage` in the DB to allow smarter queries in future.
+
+    We find bugs with unindexed messages, and ask the bug to index them.
+    """
+    maximum_chunk_size = 10000
+
+    def _to_process(self):
+        # XXX: may need an index on BugMessage.index alone.
+        return IMasterStore(BugMessage).find(
+            BugMessage.bugID,
+            BugMessage.index==None).config(distinct=True)
+
+    def isDone(self):
+        return self._to_process().any() is None
+
+    def __call__(self, chunk_size):
+        chunk_size = int(chunk_size)
+        bugs_to_index = list(self._to_process()[:chunk_size])
+        bugs = IMasterStore(Bug).find(Bug, Bug.id.is_in(bugs_to_index))
+        for bug in bugs:
+            bug.indexMessages()
+        transaction.commit()
+        self.log.debug("Indexed %d bugs" % len(bugs_to_index))
+
+
 class BugNotificationPruner(TunableLoop):
     """Prune `BugNotificationRecipient` records no longer of interest.
 
@@ -867,6 +894,7 @@
         RevisionCachePruner,
         BugHeatUpdater,
         BugWatchScheduler,
+        BugMessageIndexer,
         ]
     experimental_tunable_loops = []
 
=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py	2010-12-20 03:21:03 +0000
+++ lib/lp/scripts/tests/test_garbo.py	2011-01-31 08:23:35 +0000
@@ -10,6 +10,7 @@
     datetime,
     timedelta,
     )
+import operator
 import time
 
 from pytz import UTC
@@ -449,6 +450,26 @@
             personset.getByName('test-unlinked-person-new'), None)
         self.assertIs(personset.getByName('test-unlinked-person-old'), None)
 
+    def test_BugMessage_indexer(self):
+        LaunchpadZopelessLayer.switchDbUser('testadmin')
+        # The garbo sets BugMessage.index - so create a bug with three
+        # messages: 1 indexed and two not indexed. The two should get indexed
+        # when the garbo job runs.
+        bug = self.factory.makeBug()
+        for _ in range(3):
+            self.factory.makeBugComment(bug)
+        messages = list(bug.bug_messages)
+        messages[0].index = 0
+        messages[1].index = None
+        messages[2].index = None
+        indexed_messages = bug.indexed_messages
+        self.runHourly()
+        index_getter = operator.attrgetter('index')
+        indexed_messages = sorted(indexed_messages, key=index_getter)
+        expected = map(index_getter, indexed_messages)
+        actual = map(index_getter, bug.bug_messages)
+        self.assertEqual(expected, actual)
+
     def test_BugNotificationPruner(self):
         # Create some sample data
         LaunchpadZopelessLayer.switchDbUser('testadmin')
Follow ups