launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #22397
[Merge] lp:~cjwatson/launchpad/better-iter-chunks into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/better-iter-chunks into lp:launchpad.
Commit message:
Rename iter_list_chunks to iter_chunks and make it work on iterables.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/better-iter-chunks/+merge/343418
This saves us from having to copy quite so many large lists around in bzrsync. I doubt it will make a huge difference for very deep histories, but every little helps.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/better-iter-chunks into lp:launchpad.
=== modified file 'lib/lp/codehosting/scanner/bzrsync.py'
--- lib/lp/codehosting/scanner/bzrsync.py 2016-06-20 20:32:36 +0000
+++ lib/lp/codehosting/scanner/bzrsync.py 2018-04-17 10:06:43 +0000
@@ -1,6 +1,6 @@
#!/usr/bin/python
#
-# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Import version control metadata from a Bazaar branch into the database."""
@@ -19,6 +19,7 @@
from bzrlib.graph import DictParentsProvider
from bzrlib.revision import NULL_REVISION
import pytz
+import six
from storm.locals import Store
import transaction
from zope.component import getUtility
@@ -35,7 +36,7 @@
from lp.codehosting.scanner import events
from lp.services.config import config
from lp.services.features import getFeatureFlag
-from lp.services.utils import iter_list_chunks
+from lp.services.utils import iter_chunks
from lp.services.webhooks.interfaces import IWebhookSet
from lp.translations.interfaces.translationtemplatesbuild import (
ITranslationTemplatesBuildSource,
@@ -104,7 +105,7 @@
new_db_revs = (
new_ancestry - getUtility(IRevisionSet).onlyPresent(new_ancestry))
self.logger.info("Adding %s new revisions.", len(new_db_revs))
- for revids in iter_list_chunks(list(new_db_revs), 10000):
+ for revids in iter_chunks(new_db_revs, 10000):
revisions = self.getBazaarRevisions(bzr_branch, revids)
self.syncRevisions(bzr_branch, revisions, revids_to_insert)
self.deleteBranchRevisions(branchrevisions_to_delete)
@@ -297,8 +298,8 @@
"""Insert a batch of BranchRevision rows."""
self.logger.info("Inserting %d branchrevision records.",
len(revids_to_insert))
- revid_seq_pairs = revids_to_insert.items()
- for revid_seq_pair_chunk in iter_list_chunks(revid_seq_pairs, 10000):
+ revid_seq_pairs = six.iteritems(revids_to_insert)
+ for revid_seq_pair_chunk in iter_chunks(revid_seq_pairs, 10000):
self.db_branch.createBranchRevisionFromIDs(revid_seq_pair_chunk)
def updateBranchStatus(self, bzr_history):
=== modified file 'lib/lp/services/tests/test_utils.py'
--- lib/lp/services/tests/test_utils.py 2018-04-10 21:47:06 +0000
+++ lib/lp/services/tests/test_utils.py 2018-04-17 10:06:43 +0000
@@ -30,6 +30,7 @@
decorate_with,
docstring_dedent,
file_exists,
+ iter_chunks,
iter_split,
load_bz2_pickle,
obfuscate_structure,
@@ -141,6 +142,32 @@
list(iter_split('one/two/three', '/')))
+class TestIterChunks(TestCase):
+ """Tests for iter_chunks."""
+
+ def test_empty(self):
+ self.assertEqual([], list(iter_chunks([], 1)))
+
+ def test_sequence(self):
+ self.assertEqual(
+ [('a', 'b'), ('c', 'd'), ('e',)], list(iter_chunks('abcde', 2)))
+
+ def test_iterable(self):
+ self.assertEqual(
+ [('a', 'b'), ('c', 'd'), ('e',)],
+ list(iter_chunks(iter('abcde'), 2)))
+
+ def test_size_divides_exactly(self):
+ self.assertEqual(
+ [(1, 2, 3), (4, 5, 6), (7, 8, 9)],
+ list(iter_chunks(range(1, 10), 3)))
+
+ def test_size_does_not_divide_exactly(self):
+ self.assertEqual(
+ [(1, 2, 3), (4, 5, 6), (7, 8)],
+ list(iter_chunks(range(1, 9), 3)))
+
+
class TestCachingIterator(TestCase):
"""Tests for CachingIterator."""
=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py 2018-04-10 21:47:06 +0000
+++ lib/lp/services/utils.py 2018-04-17 10:06:43 +0000
@@ -16,7 +16,7 @@
'decorate_with',
'docstring_dedent',
'file_exists',
- 'iter_list_chunks',
+ 'iter_chunks',
'iter_split',
'load_bz2_pickle',
'obfuscate_email',
@@ -34,7 +34,10 @@
import bz2
import cPickle as pickle
from datetime import datetime
-from itertools import tee
+from itertools import (
+ islice,
+ tee,
+ )
import os
import re
import string
@@ -139,13 +142,17 @@
yield first, string[len(first):]
-def iter_list_chunks(a_list, size):
- """Iterate over `a_list` in chunks of size `size`.
+def iter_chunks(iterable, size):
+ """Iterate over `iterable` in chunks of size `size`.
I'm amazed this isn't in itertools (mwhudson).
"""
- for i in range(0, len(a_list), size):
- yield a_list[i:i + size]
+ iterable = iter(iterable)
+ while True:
+ chunk = tuple(islice(iterable, size))
+ if not chunk:
+ break
+ yield chunk
def value_string(item):
Follow ups