← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:handle-invalid-git-commit-addresses into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:handle-invalid-git-commit-addresses into launchpad:master.

Commit message:
Tolerate git commits with invalid author/committer emails

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1958281 in Launchpad itself: "Importing the snapd git repo fails"
  https://bugs.launchpad.net/launchpad/+bug/1958281

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/414314

`email.utils.formataddr` requires email addresses to be ASCII, per the email RFCs, and raises `UnicodeEncodeError` if they aren't.  However, by the time a commit gets this far, we need to do something with it (it may be an import from a repository hosted somewhere else, in which case we wouldn't even theoretically be able to reject the bad metadata on push).  Leave the author or committer unset in this case and ensure that we can still render the commit properly when we need to.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:handle-invalid-git-commit-addresses into launchpad:master.
diff --git a/lib/lp/code/browser/tests/test_gitref.py b/lib/lp/code/browser/tests/test_gitref.py
index 75b5012..8af99ca 100644
--- a/lib/lp/code/browser/tests/test_gitref.py
+++ b/lib/lp/code/browser/tests/test_gitref.py
@@ -630,6 +630,34 @@ class TestGitRefView(BrowserTestCase):
         self.assertThat(
             contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))
 
+    def test_recent_commits_with_invalid_author_email(self):
+        # If an author's email address is syntactically invalid, then we
+        # ignore the authorship information for that commit and do our best
+        # to render what we have.
+        [ref] = self.factory.makeGitRefs(paths=["refs/heads/branch"])
+        log = self.makeCommitLog()
+        log[4]["author"]["email"] = "“%s”" % log[4]["author"]["email"]
+        self.hosting_fixture.getLog.result = list(reversed(log))
+        self.scanRef(ref, log[-1])
+        view = create_initialized_view(ref, "+index")
+        contents = view()
+        expected_texts = ["%.7s...\non 2015-01-05" % log[4]["sha1"]]
+        expected_texts.extend(reversed([
+            "%.7s...\nby\n%s\non 2015-01-%02d" % (
+                log[i]["sha1"], log[i]["author"]["name"], i + 1)
+            for i in range(4)]))
+        details = find_tags_by_class(contents, "commit-details")
+        self.assertEqual(
+            expected_texts, [extract_text(detail) for detail in details])
+        expected_urls = list(reversed([
+            "https://git.launchpad.test/%s/commit/?id=%s"; % (
+                ref.repository.shortened_path, log[i]["sha1"])
+            for i in range(5)]))
+        self.assertEqual(
+            expected_urls, [detail.a["href"] for detail in details])
+        self.assertThat(
+            contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))
+
     def test_show_merge_link_for_personal_repo(self):
         person = self.factory.makePerson()
         repo = self.factory.makeGitRepository(
diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
index fca7dfa..8a3b08c 100644
--- a/lib/lp/code/model/gitrepository.py
+++ b/lib/lp/code/model/gitrepository.py
@@ -258,20 +258,34 @@ def parse_git_commits(commits):
                 info["author_date"] = datetime.fromtimestamp(
                     author["time"], tz=pytz.UTC)
             if "name" in author and "email" in author:
-                author_addr = email.utils.formataddr(
-                    (author["name"], author["email"]))
-                info["author_addr"] = author_addr
-                authors_to_acquire.append(author_addr)
+                try:
+                    author_addr = email.utils.formataddr(
+                        (author["name"], author["email"]))
+                except UnicodeEncodeError:
+                    # Addresses must be ASCII; formataddr raises
+                    # UnicodeEncodeError if they aren't.  Just skip the
+                    # author in that case.
+                    pass
+                else:
+                    info["author_addr"] = author_addr
+                    authors_to_acquire.append(author_addr)
         committer = commit.get("committer")
         if committer is not None:
             if "time" in committer:
                 info["committer_date"] = datetime.fromtimestamp(
                     committer["time"], tz=pytz.UTC)
             if "name" in committer and "email" in committer:
-                committer_addr = email.utils.formataddr(
-                    (committer["name"], committer["email"]))
-                info["committer_addr"] = committer_addr
-                committers_to_acquire.append(committer_addr)
+                try:
+                    committer_addr = email.utils.formataddr(
+                        (committer["name"], committer["email"]))
+                except UnicodeEncodeError:
+                    # Addresses must be ASCII; formataddr raises
+                    # UnicodeEncodeError if they aren't.  Just skip the
+                    # committer in that case.
+                    pass
+                else:
+                    info["committer_addr"] = committer_addr
+                    committers_to_acquire.append(committer_addr)
         if "message" in commit:
             info["commit_message"] = commit["message"]
         parsed[commit["sha1"]] = info
diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
index f78bf5d..39a46d5 100644
--- a/lib/lp/code/model/tests/test_gitrepository.py
+++ b/lib/lp/code/model/tests/test_gitrepository.py
@@ -126,6 +126,7 @@ from lp.code.model.gitrepository import (
     DeletionCallable,
     DeletionOperation,
     GitRepository,
+    parse_git_commits,
     REVISION_STATUS_REPORT_ALLOW_CREATE,
     )
 from lp.code.tests.helpers import GitHostingFixture
@@ -201,6 +202,124 @@ from lp.xmlrpc import faults
 from lp.xmlrpc.interfaces import IPrivateApplication
 
 
+class TestParseGitCommits(TestCaseWithFactory):
+
+    layer = ZopelessDatabaseLayer
+
+    def test_valid(self):
+        master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
+        author = self.factory.makePerson()
+        with person_logged_in(author):
+            author_email = author.preferredemail.email
+        author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
+        committer_date = datetime(2015, 1, 2, tzinfo=pytz.UTC)
+        commits = [
+            {
+                "sha1": master_sha1,
+                "message": "tip of master",
+                "author": {
+                    "name": author.displayname,
+                    "email": author_email,
+                    "time": int(seconds_since_epoch(author_date)),
+                    },
+                "committer": {
+                    "name": "New Person",
+                    "email": "new-person@xxxxxxxxxxx",
+                    "time": int(seconds_since_epoch(committer_date)),
+                    },
+                },
+            ]
+        parsed_commits = parse_git_commits(commits)
+        expected_author_addr = "%s <%s>" % (author.displayname, author_email)
+        [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+            [expected_author_addr]).values()
+        expected_committer_addr = "New Person <new-person@xxxxxxxxxxx>"
+        [expected_committer] = getUtility(IRevisionSet).acquireRevisionAuthors(
+            ["New Person <new-person@xxxxxxxxxxx>"]).values()
+        self.assertEqual(
+            {
+                master_sha1: {
+                    "sha1": master_sha1,
+                    "author": expected_author,
+                    "author_addr": expected_author_addr,
+                    "author_date": author_date,
+                    "committer": expected_committer,
+                    "committer_addr": expected_committer_addr,
+                    "committer_date": committer_date,
+                    "commit_message": "tip of master",
+                    },
+                },
+            parsed_commits)
+
+    def test_invalid_author_address(self):
+        master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
+        author_date = datetime(2022, 1, 1, tzinfo=pytz.UTC)
+        commits = [
+            {
+                "sha1": master_sha1,
+                "author": {
+                    "name": "New Person",
+                    "email": "“accidental-quotes@xxxxxxxxxxx”",
+                    "time": int(seconds_since_epoch(author_date)),
+                    },
+                "committer": {
+                    "name": "New Person",
+                    "email": "accidental-quotes@xxxxxxxxxxx",
+                    "time": int(seconds_since_epoch(author_date)),
+                    },
+                }
+            ]
+        parsed_commits = parse_git_commits(commits)
+        [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+            ["New Person <accidental-quotes@xxxxxxxxxxx>"]).values()
+        self.assertEqual(
+            {
+                master_sha1: {
+                    "sha1": master_sha1,
+                    "author_date": author_date,
+                    "committer": expected_author,
+                    "committer_addr": (
+                        "New Person <accidental-quotes@xxxxxxxxxxx>"),
+                    "committer_date": author_date,
+                    },
+                },
+            parsed_commits)
+
+    def test_invalid_committer_address(self):
+        master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
+        author_date = datetime(2022, 1, 1, tzinfo=pytz.UTC)
+        commits = [
+            {
+                "sha1": master_sha1,
+                "author": {
+                    "name": "New Person",
+                    "email": "accidental-quotes@xxxxxxxxxxx",
+                    "time": int(seconds_since_epoch(author_date)),
+                    },
+                "committer": {
+                    "name": "New Person",
+                    "email": "“accidental-quotes@xxxxxxxxxxx”",
+                    "time": int(seconds_since_epoch(author_date)),
+                    },
+                }
+            ]
+        parsed_commits = parse_git_commits(commits)
+        [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+            ["New Person <accidental-quotes@xxxxxxxxxxx>"]).values()
+        self.assertEqual(
+            {
+                master_sha1: {
+                    "sha1": master_sha1,
+                    "author": expected_author,
+                    "author_addr": (
+                        "New Person <accidental-quotes@xxxxxxxxxxx>"),
+                    "author_date": author_date,
+                    "committer_date": author_date,
+                    },
+                },
+            parsed_commits)
+
+
 class TestGitRepository(TestCaseWithFactory):
     """Test basic properties about Launchpad database Git repositories."""
 
diff --git a/lib/lp/code/templates/git-macros.pt b/lib/lp/code/templates/git-macros.pt
index 8679590..a990c18 100644
--- a/lib/lp/code/templates/git-macros.pt
+++ b/lib/lp/code/templates/git-macros.pt
@@ -200,17 +200,19 @@
   <dt class="commit-details"
       tal:define="
         sha1 python:commit_info['sha1'];
-        author python:commit_info['author'];
+        author python:commit_info.get('author');
         author_date python:commit_info['author_date']">
     <a tal:attributes="href python: ref.getCodebrowseUrlForRevision(sha1)"
        tal:content="sha1/fmt:shorten/10" />
-    by
-    <tal:known-person condition="author/person">
-      <tal:person-link replace="structure author/person/fmt:link" />
-    </tal:known-person>
-    <tal:unknown-person condition="not: author/person">
-      <strong tal:content="author/name/fmt:obfuscate-email" />
-    </tal:unknown-person>
+    <tal:has-author condition="author">
+      by
+      <tal:known-person condition="author/person">
+        <tal:person-link replace="structure author/person/fmt:link" />
+      </tal:known-person>
+      <tal:unknown-person condition="not: author/person">
+        <strong tal:content="author/name/fmt:obfuscate-email" />
+      </tal:unknown-person>
+    </tal:has-author>
     <tal:author-date replace="structure author_date/fmt:displaydatetitle" />
   </dt>
   <dd class="subordinate commit-message"
diff --git a/lib/lp/code/templates/gitref-listing.pt b/lib/lp/code/templates/gitref-listing.pt
index d9f3260..4eaa8e9 100644
--- a/lib/lp/code/templates/gitref-listing.pt
+++ b/lib/lp/code/templates/gitref-listing.pt
@@ -59,9 +59,11 @@ function hide_git_commit(id) {
                  tal:attributes="id string:git-ref-${repeat/ref/index};
                                  onmouseover string:hide_commit(${repeat/ref/index});">
               <p>
-                <strong>Author:</strong>
-                <tal:author replace="structure ref/author/fmt:link" />
-                <br />
+                <tal:has-author condition="ref/author">
+                  <strong>Author:</strong>
+                  <tal:author replace="structure ref/author/fmt:link" />
+                  <br />
+                </tal:has-author>
                 <strong>Author Date:</strong>
                 <tal:author replace="structure ref/author_date/fmt:datetime" />
               </p>