← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/better-iter-chunks into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/better-iter-chunks into lp:launchpad.

Commit message:
Rename iter_list_chunks to iter_chunks and make it work on iterables.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/better-iter-chunks/+merge/343418

This saves us from having to copy quite so many large lists around in bzrsync.  I doubt it will make a huge difference for very deep histories, but every little helps.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/better-iter-chunks into lp:launchpad.
=== modified file 'lib/lp/codehosting/scanner/bzrsync.py'
--- lib/lp/codehosting/scanner/bzrsync.py	2016-06-20 20:32:36 +0000
+++ lib/lp/codehosting/scanner/bzrsync.py	2018-04-17 10:06:43 +0000
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 #
-# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Import version control metadata from a Bazaar branch into the database."""
@@ -19,6 +19,7 @@
 from bzrlib.graph import DictParentsProvider
 from bzrlib.revision import NULL_REVISION
 import pytz
+import six
 from storm.locals import Store
 import transaction
 from zope.component import getUtility
@@ -35,7 +36,7 @@
 from lp.codehosting.scanner import events
 from lp.services.config import config
 from lp.services.features import getFeatureFlag
-from lp.services.utils import iter_list_chunks
+from lp.services.utils import iter_chunks
 from lp.services.webhooks.interfaces import IWebhookSet
 from lp.translations.interfaces.translationtemplatesbuild import (
     ITranslationTemplatesBuildSource,
@@ -104,7 +105,7 @@
         new_db_revs = (
             new_ancestry - getUtility(IRevisionSet).onlyPresent(new_ancestry))
         self.logger.info("Adding %s new revisions.", len(new_db_revs))
-        for revids in iter_list_chunks(list(new_db_revs), 10000):
+        for revids in iter_chunks(new_db_revs, 10000):
             revisions = self.getBazaarRevisions(bzr_branch, revids)
             self.syncRevisions(bzr_branch, revisions, revids_to_insert)
         self.deleteBranchRevisions(branchrevisions_to_delete)
@@ -297,8 +298,8 @@
         """Insert a batch of BranchRevision rows."""
         self.logger.info("Inserting %d branchrevision records.",
             len(revids_to_insert))
-        revid_seq_pairs = revids_to_insert.items()
-        for revid_seq_pair_chunk in iter_list_chunks(revid_seq_pairs, 10000):
+        revid_seq_pairs = six.iteritems(revids_to_insert)
+        for revid_seq_pair_chunk in iter_chunks(revid_seq_pairs, 10000):
             self.db_branch.createBranchRevisionFromIDs(revid_seq_pair_chunk)
 
     def updateBranchStatus(self, bzr_history):

=== modified file 'lib/lp/services/tests/test_utils.py'
--- lib/lp/services/tests/test_utils.py	2018-04-10 21:47:06 +0000
+++ lib/lp/services/tests/test_utils.py	2018-04-17 10:06:43 +0000
@@ -30,6 +30,7 @@
     decorate_with,
     docstring_dedent,
     file_exists,
+    iter_chunks,
     iter_split,
     load_bz2_pickle,
     obfuscate_structure,
@@ -141,6 +142,32 @@
             list(iter_split('one/two/three', '/')))
 
 
+class TestIterChunks(TestCase):
+    """Tests for iter_chunks."""
+
+    def test_empty(self):
+        self.assertEqual([], list(iter_chunks([], 1)))
+
+    def test_sequence(self):
+        self.assertEqual(
+            [('a', 'b'), ('c', 'd'), ('e',)], list(iter_chunks('abcde', 2)))
+
+    def test_iterable(self):
+        self.assertEqual(
+            [('a', 'b'), ('c', 'd'), ('e',)],
+            list(iter_chunks(iter('abcde'), 2)))
+
+    def test_size_divides_exactly(self):
+        self.assertEqual(
+            [(1, 2, 3), (4, 5, 6), (7, 8, 9)],
+            list(iter_chunks(range(1, 10), 3)))
+
+    def test_size_does_not_divide_exactly(self):
+        self.assertEqual(
+            [(1, 2, 3), (4, 5, 6), (7, 8)],
+            list(iter_chunks(range(1, 9), 3)))
+
+
 class TestCachingIterator(TestCase):
     """Tests for CachingIterator."""
 

=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py	2018-04-10 21:47:06 +0000
+++ lib/lp/services/utils.py	2018-04-17 10:06:43 +0000
@@ -16,7 +16,7 @@
     'decorate_with',
     'docstring_dedent',
     'file_exists',
-    'iter_list_chunks',
+    'iter_chunks',
     'iter_split',
     'load_bz2_pickle',
     'obfuscate_email',
@@ -34,7 +34,10 @@
 import bz2
 import cPickle as pickle
 from datetime import datetime
-from itertools import tee
+from itertools import (
+    islice,
+    tee,
+    )
 import os
 import re
 import string
@@ -139,13 +142,17 @@
         yield first, string[len(first):]
 
 
-def iter_list_chunks(a_list, size):
-    """Iterate over `a_list` in chunks of size `size`.
+def iter_chunks(iterable, size):
+    """Iterate over `iterable` in chunks of size `size`.
 
     I'm amazed this isn't in itertools (mwhudson).
     """
-    for i in range(0, len(a_list), size):
-        yield a_list[i:i + size]
+    iterable = iter(iterable)
+    while True:
+        chunk = tuple(islice(iterable, size))
+        if not chunk:
+            break
+        yield chunk
 
 
 def value_string(item):


Follow ups