← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/optimise-git-ref-scan into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/optimise-git-ref-scan into lp:launchpad.

Commit message:
Reduce the number of GitRef columns fetched by ref scan jobs.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/optimise-git-ref-scan/+merge/359171

GitRef rows can be pretty wide due to commit_message, and we don't need the whole thing here.

I'm sure there's more to do here, since there are repositories with more than 100000 refs, but this may help with memory exhaustion issues.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/optimise-git-ref-scan into lp:launchpad.
=== modified file 'lib/lp/code/model/gitrepository.py'
--- lib/lp/code/model/gitrepository.py	2018-11-09 22:06:43 +0000
+++ lib/lp/code/model/gitrepository.py	2018-11-22 07:23:44 +0000
@@ -679,20 +679,27 @@
                 if logger is not None:
                     logger.warning(
                         "Unconvertible ref %s %s: %s" % (path, info, e))
-        current_refs = {ref.path: ref for ref in self.refs}
+        # GitRef rows can be large (especially commit_message), and we don't
+        # need the whole thing.
+        current_refs = {
+            ref[0]: ref[1:]
+            for ref in Store.of(self).find(
+                (GitRef.path, GitRef.commit_sha1, GitRef.object_type,
+                 Or(
+                     GitRef.author_id != None,
+                     GitRef.author_date != None,
+                     GitRef.committer_id != None,
+                     GitRef.committer_date != None,
+                     GitRef.commit_message != None)),
+                GitRef.repository_id == self.id)}
         refs_to_upsert = {}
         for path, info in new_refs.items():
             current_ref = current_refs.get(path)
             if (current_ref is None or
-                info["sha1"] != current_ref.commit_sha1 or
-                info["type"] != current_ref.object_type):
+                    info["sha1"] != current_ref[0] or
+                    info["type"] != current_ref[1]):
                 refs_to_upsert[path] = info
-            elif (info["type"] == GitObjectType.COMMIT and
-                  (current_ref.author_id is None or
-                   current_ref.author_date is None or
-                   current_ref.committer_id is None or
-                   current_ref.committer_date is None or
-                   current_ref.commit_message is None)):
+            elif info["type"] == GitObjectType.COMMIT and not current_ref[2]:
                 # Only request detailed commit metadata for refs that point
                 # to commits.
                 refs_to_upsert[path] = info

=== modified file 'lib/lp/code/model/tests/test_gitrepository.py'
--- lib/lp/code/model/tests/test_gitrepository.py	2018-11-09 22:06:43 +0000
+++ lib/lp/code/model/tests/test_gitrepository.py	2018-11-22 07:23:44 +0000
@@ -1453,6 +1453,41 @@
             }))
         self.assertEqual(({}, set()), repository.planRefChanges("dummy"))
 
+    def test_planRefChanges_includes_unfetched_commits(self):
+        # planRefChanges plans updates to refs pointing to commits for which
+        # we haven't yet fetched detailed metadata.
+        repository = self.factory.makeGitRepository()
+        paths = ("refs/heads/master", "refs/heads/foo")
+        self.factory.makeGitRefs(repository=repository, paths=paths)
+        author_addr = (
+            removeSecurityProxy(repository.owner).preferredemail.email)
+        [author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+            [author_addr]).values()
+        naked_master = removeSecurityProxy(
+            repository.getRefByPath("refs/heads/master"))
+        naked_master.author_id = naked_master.committer_id = author.id
+        naked_master.author_date = naked_master.committer_date = (
+            datetime.now(pytz.UTC))
+        naked_master.commit_message = "message"
+        self.useFixture(GitHostingFixture(refs={
+            path: {
+                "object": {
+                    "sha1": repository.getRefByPath(path).commit_sha1,
+                    "type": "commit",
+                    },
+                }
+            for path in paths}))
+        refs_to_upsert, refs_to_remove = repository.planRefChanges("dummy")
+
+        expected_upsert = {
+            "refs/heads/foo": {
+                "sha1": repository.getRefByPath("refs/heads/foo").commit_sha1,
+                "type": GitObjectType.COMMIT,
+                },
+            }
+        self.assertEqual(expected_upsert, refs_to_upsert)
+        self.assertEqual(set(), refs_to_remove)
+
     def test_fetchRefCommits(self):
         # fetchRefCommits fetches detailed tip commit metadata for the
         # requested refs.


Follow ups