← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:go-metadata-scanning into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:go-metadata-scanning into launchpad:master with ~cjwatson/launchpad:go-proxy-archive-layout as a prerequisite.

Commit message:
Implement metadata scanning for Go modules

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/426967

This also requires parsing `go.mod` files.  There doesn't seem to be an existing parser for these in Python, but the format is relatively well-specified, so I implemented a simple one using `pyparsing`.  It doesn't pretend to handle the whole file format, just enough for what we need.

I had to do some preliminary refactoring to cope with Go modules consisting of multiple files.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:go-metadata-scanning into launchpad:master.
diff --git a/lib/lp/registry/interfaces/sourcepackage.py b/lib/lp/registry/interfaces/sourcepackage.py
index 6a7e707..eac72fd 100644
--- a/lib/lp/registry/interfaces/sourcepackage.py
+++ b/lib/lp/registry/interfaces/sourcepackage.py
@@ -427,6 +427,24 @@ class SourcePackageFileType(DBEnumeratedType):
         This file is a Python source distribution ("sdist").
         """)
 
+    GO_MODULE_INFO = DBItem(12, """
+        Go Module Info
+
+        This file contains JSON metadata about a Go module.
+        """)
+
+    GO_MODULE_MOD = DBItem(13, """
+        go.mod
+
+        This is the `go.mod` file for a Go module.
+        """)
+
+    GO_MODULE_ZIP = DBItem(14, """
+        Zipped Go Module
+
+        This is the zipped contents of a Go module.
+        """)
+
 
 class SourcePackageType(DBEnumeratedType):
     """Source Package Format
diff --git a/lib/lp/soyuz/adapters/gomodparser.py b/lib/lp/soyuz/adapters/gomodparser.py
new file mode 100644
index 0000000..715d7c1
--- /dev/null
+++ b/lib/lp/soyuz/adapters/gomodparser.py
@@ -0,0 +1,116 @@
+# Copyright 2022 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Parser for go.mod files.
+
+These are specified in https://go.dev/ref/mod#go-mod-file.
+"""
+
+__all__ = [
+    "GoModParserException",
+    "parse_go_mod",
+]
+
+from contextlib import contextmanager
+import string
+from typing import Iterator
+
+import pyparsing as pp
+
+
+@contextmanager
+def pyparsing_whitespace_chars(chars: str) -> Iterator[None]:
+    """Temporarily override `pyparsing`'s default whitespace characters.
+
+    `pyparsing` lets us override the whitespace characters for a single parser
+    element, or globally.  We want to override them for all the elements in
+    this parser, but don't want to leave that state lying around for anything
+    else in the same process that might use `pyparsing`.
+    """
+    original_chars = pp.ParserElement.DEFAULT_WHITE_CHARS
+    try:
+        pp.ParserElement.setDefaultWhitespaceChars(chars)
+        yield
+    finally:
+        pp.ParserElement.setDefaultWhitespaceChars(original_chars)
+
+
+# "\n" is significant in go.mod files, so tell pyparsing not to treat it as
+# ordinary whitespace.
+@pyparsing_whitespace_chars(" \t\r")
+def make_go_mod_parser() -> pp.ParserElement:
+    lparen = pp.Literal("(")
+    rparen = pp.Literal(")")
+    newline = pp.Literal("\n")
+
+    comment = pp.dblSlashComment.copy()
+    ident = pp.Word(
+        # This seems very broad, but it appears to be what the specification
+        # calls for and what the official lexer in
+        # https://cs.opensource.google/go/x/mod/+/master:modfile/read.go
+        # does.
+        "".join(
+            c
+            for c in string.printable
+            if c not in string.whitespace + '()[]{},"`'
+        )
+    ).setName("identifier")
+    interpreted_string = pp.QuotedString(quoteChar='"', escChar="\\").setName(
+        "interpreted string"
+    )
+    raw_string = pp.QuotedString(quoteChar="`", multiline=True).setName(
+        "raw string"
+    )
+    quoted_string = interpreted_string | raw_string
+
+    module_keyword = pp.Keyword("module")
+    module_path = (ident | quoted_string).setResultsName("module_path")
+    module_directive = (
+        module_keyword
+        - (module_path | (lparen + newline + module_path + newline + rparen))
+        + newline
+    )
+
+    # The official EBNF for go.mod includes a number of other directives,
+    # but we aren't interested in those, and relying on having a current
+    # list of all the possible directives would mean that we'd have to keep
+    # updating this code as the syntax for go.mod is extended.  Instead, add
+    # some generic parser elements covering the general form of those other
+    # directives.  (The official parser in
+    # https://cs.opensource.google/go/x/mod/+/master:modfile/rule.go has a
+    # similar rationale for its "ParseLax" function.)
+    line_block = (
+        ident
+        + pp.nestedExpr(
+            opener="(", closer=")", ignoreExpr=comment | quoted_string
+        )
+        + newline
+    )
+    line = ident + pp.restOfLine + newline
+
+    return pp.ZeroOrMore(
+        module_directive | line_block | line | newline
+    ).ignore(comment)
+
+
+go_mod_parser = make_go_mod_parser()
+
+
+class GoModParserException(Exception):
+    pass
+
+
+def parse_go_mod(text: str) -> str:
+    """Parse a `go.mod` file, returning the module path."""
+    try:
+        parsed = go_mod_parser.parseString(text, parseAll=True)
+    except pp.ParseBaseException as e:
+        # pyparsing's exceptions are excessively detailed for our purposes,
+        # often including the whole grammar.  Raise something a bit more
+        # concise.
+        raise GoModParserException(
+            "Parse failed at line %d, column %d" % (e.lineno, e.column)
+        )
+    if "module_path" not in parsed:
+        raise GoModParserException("No 'module' directive found")
+    return parsed["module_path"]
diff --git a/lib/lp/soyuz/adapters/tests/test_gomodparser.py b/lib/lp/soyuz/adapters/tests/test_gomodparser.py
new file mode 100644
index 0000000..0efdbc4
--- /dev/null
+++ b/lib/lp/soyuz/adapters/tests/test_gomodparser.py
@@ -0,0 +1,63 @@
+# Copyright 2022 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""go.mod parser tests."""
+
+from lp.soyuz.adapters.gomodparser import (
+    GoModParserException,
+    parse_go_mod,
+    )
+from lp.testing import TestCase
+from lp.testing.layers import BaseLayer
+
+
+class TestParseGoMod(TestCase):
+
+    layer = BaseLayer
+
+    def test_module_identifier(self):
+        self.assertEqual(
+            "example.com/foo/bar", parse_go_mod("module example.com/foo/bar\n")
+        )
+
+    def test_module_interpreted_string(self):
+        self.assertEqual(
+            "example.com/foo/bar",
+            parse_go_mod('module "example\\.com\\/foo/bar"\n'),
+        )
+
+    def test_module_raw_string(self):
+        self.assertEqual(
+            "example.com/foo/bar",
+            parse_go_mod("module `example.com/foo/bar`\n"),
+        )
+
+    def test_ignores_other_directives(self):
+        self.assertEqual(
+            "foo",
+            parse_go_mod(
+                "module foo\n"
+                "\n"
+                "go 1.18\n"
+                "replace (\n"
+                "\txyz v1 => ./a\n"
+                "\txyz v2 => ./b\n"
+                ")\n"
+            ),
+        )
+
+    def test_parse_failed(self):
+        self.assertRaisesWithContent(
+            GoModParserException,
+            "Parse failed at line 1, column 9",
+            parse_go_mod,
+            "module (",
+        )
+
+    def test_no_module_directive(self):
+        self.assertRaisesWithContent(
+            GoModParserException,
+            "No 'module' directive found",
+            parse_go_mod,
+            "go 1.18\n",
+        )
diff --git a/lib/lp/soyuz/model/archivejob.py b/lib/lp/soyuz/model/archivejob.py
index ae038a3..1d3c281 100644
--- a/lib/lp/soyuz/model/archivejob.py
+++ b/lib/lp/soyuz/model/archivejob.py
@@ -5,7 +5,7 @@ from collections import OrderedDict
 import io
 import json
 import logging
-import os.path
+from pathlib import Path
 import tarfile
 import tempfile
 from typing import (
@@ -63,6 +63,10 @@ from lp.services.job.runner import BaseRunnableJob
 from lp.services.librarian.interfaces.client import LibrarianServerError
 from lp.services.librarian.utils import copy_and_close
 from lp.services.mail.sendmail import format_address_for_person
+from lp.soyuz.adapters.gomodparser import (
+    GoModParserException,
+    parse_go_mod,
+    )
 from lp.soyuz.enums import (
     ArchiveJobType,
     ArchiveRepositoryFormat,
@@ -298,6 +302,11 @@ class CIBuildUploadJob(ArchiveJobDerived):
             BinaryPackageFormat.CONDA_V1,
             BinaryPackageFormat.CONDA_V2,
             },
+        ArchiveRepositoryFormat.GO_PROXY: {
+            SourcePackageFileType.GO_MODULE_INFO,
+            SourcePackageFileType.GO_MODULE_MOD,
+            SourcePackageFileType.GO_MODULE_ZIP,
+            },
         }
 
     @classmethod
@@ -366,38 +375,48 @@ class CIBuildUploadJob(ArchiveJobDerived):
     def target_channel(self):
         return self.metadata["target_channel"]
 
-    def _scanWheel(self, path: str) -> Optional[BinaryArtifactMetadata]:
-        try:
-            parsed_path = parse_wheel_filename(path)
-            wheel = Wheel(path)
-        except Exception as e:
-            logger.warning(
-                "Failed to scan %s as a Python wheel: %s",
-                os.path.basename(path), e)
-            return None
-        return BinaryArtifactMetadata(
-            format=BinaryPackageFormat.WHL,
-            name=wheel.name,
-            version=wheel.version,
-            summary=wheel.summary or "",
-            description=wheel.description,
-            architecturespecific="any" not in parsed_path.platform_tags,
-            homepage=wheel.home_page or "",
-        )
+    def _scanWheel(self, paths: Iterable[Path]) -> Dict[str, ArtifactMetadata]:
+        all_metadata = {}
+        for path in paths:
+            if not path.name.endswith(".whl"):
+                continue
+            try:
+                parsed_path = parse_wheel_filename(str(path))
+                wheel = Wheel(str(path))
+            except Exception as e:
+                logger.warning(
+                    "Failed to scan %s as a Python wheel: %s", path.name, e)
+                continue
+            logger.info("%s is a Python wheel", path.name)
+            all_metadata[path.name] = BinaryArtifactMetadata(
+                format=BinaryPackageFormat.WHL,
+                name=wheel.name,
+                version=wheel.version,
+                summary=wheel.summary or "",
+                description=wheel.description,
+                architecturespecific="any" not in parsed_path.platform_tags,
+                homepage=wheel.home_page or "",
+            )
+        return all_metadata
 
-    def _scanSDist(self, path: str) -> Optional[SourceArtifactMetadata]:
-        try:
-            sdist = SDist(path)
-        except Exception as e:
-            logger.warning(
-                "Failed to scan %s as a Python sdist: %s",
-                os.path.basename(path), e)
-            return None
-        return SourceArtifactMetadata(
-            format=SourcePackageFileType.SDIST,
-            name=sdist.name,
-            version=sdist.version,
-        )
+    def _scanSDist(self, paths: Iterable[Path]) -> Dict[str, ArtifactMetadata]:
+        all_metadata = {}
+        for path in paths:
+            if not path.name.endswith((".tar.gz", ".zip")):
+                continue
+            try:
+                sdist = SDist(str(path))
+            except Exception as e:
+                logger.warning(
+                    "Failed to scan %s as a Python sdist: %s", path.name, e)
+                continue
+            logger.info("%s is a Python sdist", path.name)
+            all_metadata[path.name] = SourceArtifactMetadata(
+                format=SourcePackageFileType.SDIST,
+                name=sdist.name,
+                version=sdist.version,
+            )
+        return all_metadata
 
     def _scanCondaMetadata(
         self, format: BinaryPackageFormat, index: Dict[Any, Any],
@@ -418,61 +437,121 @@ class CIBuildUploadJob(ArchiveJobDerived):
             user_defined_fields=[("subdir", index["subdir"])],
         )
 
-    def _scanCondaV1(self, path: str) -> Optional[BinaryArtifactMetadata]:
-        try:
-            with tarfile.open(path) as tar:
-                index = json.loads(
-                    tar.extractfile("info/index.json").read().decode())
-                about = json.loads(
-                    tar.extractfile("info/about.json").read().decode())
-        except Exception as e:
-            logger.warning(
-                "Failed to scan %s as a Conda v1 package: %s",
-                os.path.basename(path), e)
-            return None
-        return self._scanCondaMetadata(
-            BinaryPackageFormat.CONDA_V1, index, about)
-
-    def _scanCondaV2(self, path: str) -> Optional[BinaryArtifactMetadata]:
-        try:
-            with zipfile.ZipFile(path) as zipf:
-                base_name = os.path.basename(path)[:-len(".conda")]
-                info = io.BytesIO()
-                with zipf.open("info-%s.tar.zst" % base_name) as raw_info:
-                    zstandard.ZstdDecompressor().copy_stream(raw_info, info)
-                info.seek(0)
-                with tarfile.open(fileobj=info) as tar:
+    def _scanCondaV1(
+        self, paths: Iterable[Path]
+    ) -> Dict[str, ArtifactMetadata]:
+        all_metadata = {}
+        for path in paths:
+            if not path.name.endswith(".tar.bz2"):
+                continue
+            try:
+                with tarfile.open(str(path)) as tar:
                     index = json.loads(
                         tar.extractfile("info/index.json").read().decode())
                     about = json.loads(
                         tar.extractfile("info/about.json").read().decode())
-        except Exception as e:
-            logger.warning(
-                "Failed to scan %s as a Conda v2 package: %s",
-                os.path.basename(path), e)
-            return None
-        return self._scanCondaMetadata(
-            BinaryPackageFormat.CONDA_V2, index, about)
-
-    def _scanFile(self, path: str) -> Optional[ArtifactMetadata]:
-        _scanners = (
-            (".whl", self._scanWheel),
-            (".tar.gz", self._scanSDist),
-            (".zip", self._scanSDist),
-            (".tar.bz2", self._scanCondaV1),
-            (".conda", self._scanCondaV2),
+            except Exception as e:
+                logger.warning(
+                    "Failed to scan %s as a Conda v1 package: %s",
+                    path.name, e)
+                continue
+            logger.info("%s is a Conda v1 package", path.name)
+            all_metadata[path.name] = self._scanCondaMetadata(
+                BinaryPackageFormat.CONDA_V1, index, about)
+        return all_metadata
+
+    def _scanCondaV2(
+        self, paths: Iterable[Path]
+    ) -> Dict[str, ArtifactMetadata]:
+        all_metadata = {}
+        for path in paths:
+            if not path.name.endswith(".conda"):
+                continue
+            try:
+                with zipfile.ZipFile(str(path)) as zipf:
+                    info = io.BytesIO()
+                    with zipf.open("info-%s.tar.zst" % path.stem) as raw_info:
+                        zstandard.ZstdDecompressor().copy_stream(
+                            raw_info, info)
+                    info.seek(0)
+                    with tarfile.open(fileobj=info) as tar:
+                        index = json.loads(
+                            tar.extractfile("info/index.json").read().decode())
+                        about = json.loads(
+                            tar.extractfile("info/about.json").read().decode())
+            except Exception as e:
+                logger.warning(
+                    "Failed to scan %s as a Conda v2 package: %s",
+                    path.name, e)
+                continue
+            logger.info("%s is a Conda v2 package", path.name)
+            all_metadata[path.name] = self._scanCondaMetadata(
+                BinaryPackageFormat.CONDA_V2, index, about)
+        return all_metadata
+
+    def _scanGoMod(self, paths: Iterable[Path]) -> Dict[str, ArtifactMetadata]:
+        all_metadata = {}
+        for path in paths:
+            if not path.name.endswith(".mod"):
+                continue
+            info_path = path.parent / ("%s.info" % path.stem)
+            if not info_path.is_file():
+                logger.warning("%s has no corresponding .info file", path.name)
+                continue
+            zip_path = path.parent / ("%s.zip" % path.stem)
+            if not zip_path.is_file():
+                logger.warning("%s has no corresponding .zip file", path.name)
+                continue
+            with open(str(info_path)) as info_file:
+                try:
+                    version = json.load(info_file)["Version"]
+                except Exception as e:
+                    logger.warning(
+                        "Failed to load Go module version from %s: %s",
+                        info_path.name, e)
+            try:
+                with open(str(path)) as mod_file:
+                    module_path = parse_go_mod(mod_file.read())
+            except GoModParserException as e:
+                logger.warning(
+                    "Failed to scan %s as a Go module: %s",
+                    path.name, e)
+                continue
+            logger.info(
+                "(%s, %s, %s) are a Go module",
+                info_path.name, path.name, zip_path.name)
+            all_metadata[info_path.name] = SourceArtifactMetadata(
+                format=SourcePackageFileType.GO_MODULE_INFO,
+                name=module_path,
+                version=version,
             )
-        found_scanner = False
-        for suffix, scanner in _scanners:
-            if path.endswith(suffix):
-                found_scanner = True
-                scanned = scanner(path)
-                if scanned is not None:
-                    return scanned
-        else:
-            if not found_scanner:
-                logger.info("No upload handler for %s", os.path.basename(path))
-            return None
+            all_metadata[path.name] = SourceArtifactMetadata(
+                format=SourcePackageFileType.GO_MODULE_MOD,
+                name=module_path,
+                version=version,
+            )
+            all_metadata[zip_path.name] = SourceArtifactMetadata(
+                format=SourcePackageFileType.GO_MODULE_ZIP,
+                name=module_path,
+                version=version,
+            )
+        return all_metadata
+
+    def _scanFiles(self, directory: Path) -> Dict[str, ArtifactMetadata]:
+        scanners = (
+            self._scanWheel,
+            self._scanSDist,
+            self._scanCondaV1,
+            self._scanCondaV2,
+            self._scanGoMod,
+            )
+        paths = [directory / child for child in directory.iterdir()]
+        all_metadata = OrderedDict()
+        for scanner in scanners:
+            for name, metadata in scanner(paths).items():
+                all_metadata[name] = metadata
+            paths = [path for path in paths if path.name not in all_metadata]
+        return all_metadata
 
     def _scanArtifacts(
         self, artifacts: Iterable[IRevisionStatusArtifact]
@@ -490,23 +569,26 @@ class CIBuildUploadJob(ArchiveJobDerived):
                 self.archive.repository_format, set()))
         scanned = []
         with tempfile.TemporaryDirectory(prefix="ci-build-copy-job") as tmpdir:
+            tmpdirpath = Path(tmpdir)
+            artifact_by_name = {}
             for artifact in artifacts:
                 if artifact.artifact_type == RevisionStatusArtifactType.LOG:
                     continue
                 name = artifact.library_file.filename
-                contents = os.path.join(tmpdir, name)
+                contents = str(tmpdirpath / name)
                 artifact.library_file.open()
                 copy_and_close(artifact.library_file, open(contents, "wb"))
-                metadata = self._scanFile(contents)
-                if metadata is None:
-                    continue
+                artifact_by_name[name] = artifact
+            all_metadata = self._scanFiles(tmpdirpath)
+            for name, metadata in all_metadata.items():
                 if metadata.format not in allowed_formats:
                     logger.info(
                         "Skipping %s (not relevant to %s archives)",
                         name, self.archive.repository_format)
                     continue
                 scanned.append(
-                    ScannedArtifact(artifact=artifact, metadata=metadata))
+                    ScannedArtifact(
+                        artifact=artifact_by_name[name], metadata=metadata))
         return scanned
 
     def _uploadSources(self, scanned: Sequence[ScannedArtifact]) -> None:
diff --git a/lib/lp/soyuz/tests/data/go/.launchpad.yaml b/lib/lp/soyuz/tests/data/go/.launchpad.yaml
new file mode 100644
index 0000000..a9c4ba2
--- /dev/null
+++ b/lib/lp/soyuz/tests/data/go/.launchpad.yaml
@@ -0,0 +1,19 @@
+pipeline:
+    - build
+
+jobs:
+    build:
+        series: focal
+        architectures: amd64
+        packages:
+            - zip
+        run: |
+            mkdir -p out
+            zip -r out/v0.0.1.zip t.go
+            cp -a go.mod out/v0.0.1.mod
+            echo '{"Version": "v0.0.1"}' >out/v0.0.1.info
+        output:
+            paths:
+                - out/*.info
+                - out/*.mod
+                - out/*.zip
diff --git a/lib/lp/soyuz/tests/data/go/dist/v0.0.1.info b/lib/lp/soyuz/tests/data/go/dist/v0.0.1.info
new file mode 100644
index 0000000..d570cb5
--- /dev/null
+++ b/lib/lp/soyuz/tests/data/go/dist/v0.0.1.info
@@ -0,0 +1 @@
+{"Version": "v0.0.1"}
diff --git a/lib/lp/soyuz/tests/data/go/dist/v0.0.1.mod b/lib/lp/soyuz/tests/data/go/dist/v0.0.1.mod
new file mode 100644
index 0000000..649f9bf
--- /dev/null
+++ b/lib/lp/soyuz/tests/data/go/dist/v0.0.1.mod
@@ -0,0 +1,3 @@
+module example.com/t
+
+go 1.18
diff --git a/lib/lp/soyuz/tests/data/go/dist/v0.0.1.zip b/lib/lp/soyuz/tests/data/go/dist/v0.0.1.zip
new file mode 100644
index 0000000..d3ff66a
Binary files /dev/null and b/lib/lp/soyuz/tests/data/go/dist/v0.0.1.zip differ
diff --git a/lib/lp/soyuz/tests/data/go/go.mod b/lib/lp/soyuz/tests/data/go/go.mod
new file mode 100644
index 0000000..649f9bf
--- /dev/null
+++ b/lib/lp/soyuz/tests/data/go/go.mod
@@ -0,0 +1,3 @@
+module example.com/t
+
+go 1.18
diff --git a/lib/lp/soyuz/tests/data/go/t.go b/lib/lp/soyuz/tests/data/go/t.go
new file mode 100644
index 0000000..edbafea
--- /dev/null
+++ b/lib/lp/soyuz/tests/data/go/t.go
@@ -0,0 +1 @@
+package t
diff --git a/lib/lp/soyuz/tests/test_archivejob.py b/lib/lp/soyuz/tests/test_archivejob.py
index 813b423..e5b29c2 100644
--- a/lib/lp/soyuz/tests/test_archivejob.py
+++ b/lib/lp/soyuz/tests/test_archivejob.py
@@ -2,17 +2,21 @@
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 import os.path
+from pathlib import Path
+import shutil
 
 from debian.deb822 import Changes
 from fixtures import (
     FakeLogger,
     MockPatch,
     MockPatchObject,
+    TempDir,
     )
 from testtools.matchers import (
     ContainsDict,
     Equals,
     Is,
+    MatchesDict,
     MatchesSetwise,
     MatchesStructure,
     )
@@ -46,10 +50,8 @@ from lp.soyuz.enums import (
 from lp.soyuz.model.archivejob import (
     ArchiveJob,
     ArchiveJobDerived,
-    BinaryArtifactMetadata,
     CIBuildUploadJob,
     PackageUploadNotificationJob,
-    SourceArtifactMetadata,
     )
 from lp.soyuz.tests import datadir
 from lp.testing import (
@@ -240,7 +242,7 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         self.assertEqual(PackagePublishingPocket.RELEASE, job.target_pocket)
         self.assertEqual("edge", job.target_channel)
 
-    def test__scanFile_wheel_indep(self):
+    def test__scanFiles_wheel_indep(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -248,22 +250,25 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "wheel-indep/dist/wheel_indep-0.0.1-py3-none-any.whl"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, BinaryArtifactMetadata)
+        path = Path("wheel-indep/dist/wheel_indep-0.0.1-py3-none-any.whl")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure(
-                format=Equals(BinaryPackageFormat.WHL),
-                name=Equals("wheel-indep"),
-                version=Equals("0.0.1"),
-                summary=Equals("Example description"),
-                description=Equals("Example long description\n"),
-                architecturespecific=Is(False),
-                homepage=Equals(""),
-                ))
-
-    def test__scanFile_wheel_arch(self):
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure(
+                    format=Equals(BinaryPackageFormat.WHL),
+                    name=Equals("wheel-indep"),
+                    version=Equals("0.0.1"),
+                    summary=Equals("Example description"),
+                    description=Equals("Example long description\n"),
+                    architecturespecific=Is(False),
+                    homepage=Equals(""),
+                    ),
+                }))
+
+    def test__scanFiles_wheel_arch(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -271,22 +276,26 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "wheel-arch/dist/wheel_arch-0.0.1-cp310-cp310-linux_x86_64.whl"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, BinaryArtifactMetadata)
+        path = Path(
+            "wheel-arch/dist/wheel_arch-0.0.1-cp310-cp310-linux_x86_64.whl")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure(
-                format=Equals(BinaryPackageFormat.WHL),
-                name=Equals("wheel-arch"),
-                version=Equals("0.0.1"),
-                summary=Equals("Example description"),
-                description=Equals("Example long description\n"),
-                architecturespecific=Is(True),
-                homepage=Equals("http://example.com/";),
-                ))
-
-    def test__scanFile_sdist(self):
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure(
+                    format=Equals(BinaryPackageFormat.WHL),
+                    name=Equals("wheel-arch"),
+                    version=Equals("0.0.1"),
+                    summary=Equals("Example description"),
+                    description=Equals("Example long description\n"),
+                    architecturespecific=Is(True),
+                    homepage=Equals("http://example.com/";),
+                    ),
+                }))
+
+    def test__scanFiles_sdist(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -294,18 +303,21 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "wheel-arch/dist/wheel-arch-0.0.1.tar.gz"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, SourceArtifactMetadata)
+        path = Path("wheel-arch/dist/wheel-arch-0.0.1.tar.gz")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure.byEquality(
-                format=SourcePackageFileType.SDIST,
-                name="wheel-arch",
-                version="0.0.1",
-                ))
-
-    def test__scanFile_conda_indep(self):
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure.byEquality(
+                    format=SourcePackageFileType.SDIST,
+                    name="wheel-arch",
+                    version="0.0.1",
+                    ),
+                }))
+
+    def test__scanFiles_conda_indep(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -313,23 +325,26 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "conda-indep/dist/noarch/conda-indep-0.1-0.tar.bz2"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, BinaryArtifactMetadata)
+        path = Path("conda-indep/dist/noarch/conda-indep-0.1-0.tar.bz2")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure(
-                format=Equals(BinaryPackageFormat.CONDA_V1),
-                name=Equals("conda-indep"),
-                version=Equals("0.1"),
-                summary=Equals("Example summary"),
-                description=Equals("Example description"),
-                architecturespecific=Is(False),
-                homepage=Equals(""),
-                user_defined_fields=Equals([("subdir", "noarch")]),
-                ))
-
-    def test__scanFile_conda_arch(self):
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure(
+                    format=Equals(BinaryPackageFormat.CONDA_V1),
+                    name=Equals("conda-indep"),
+                    version=Equals("0.1"),
+                    summary=Equals("Example summary"),
+                    description=Equals("Example description"),
+                    architecturespecific=Is(False),
+                    homepage=Equals(""),
+                    user_defined_fields=Equals([("subdir", "noarch")]),
+                    ),
+                }))
+
+    def test__scanFiles_conda_arch(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -337,23 +352,26 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "conda-arch/dist/linux-64/conda-arch-0.1-0.tar.bz2"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, BinaryArtifactMetadata)
+        path = Path("conda-arch/dist/linux-64/conda-arch-0.1-0.tar.bz2")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure(
-                format=Equals(BinaryPackageFormat.CONDA_V1),
-                name=Equals("conda-arch"),
-                version=Equals("0.1"),
-                summary=Equals("Example summary"),
-                description=Equals("Example description"),
-                architecturespecific=Is(True),
-                homepage=Equals("http://example.com/";),
-                user_defined_fields=Equals([("subdir", "linux-64")]),
-                ))
-
-    def test__scanFile_conda_v2_indep(self):
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure(
+                    format=Equals(BinaryPackageFormat.CONDA_V1),
+                    name=Equals("conda-arch"),
+                    version=Equals("0.1"),
+                    summary=Equals("Example summary"),
+                    description=Equals("Example description"),
+                    architecturespecific=Is(True),
+                    homepage=Equals("http://example.com/";),
+                    user_defined_fields=Equals([("subdir", "linux-64")]),
+                    ),
+                }))
+
+    def test__scanFiles_conda_v2_indep(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -361,23 +379,26 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "conda-v2-indep/dist/noarch/conda-v2-indep-0.1-0.conda"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, BinaryArtifactMetadata)
+        path = Path("conda-v2-indep/dist/noarch/conda-v2-indep-0.1-0.conda")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure(
-                format=Equals(BinaryPackageFormat.CONDA_V2),
-                name=Equals("conda-v2-indep"),
-                version=Equals("0.1"),
-                summary=Equals("Example summary"),
-                description=Equals("Example description"),
-                architecturespecific=Is(False),
-                homepage=Equals(""),
-                user_defined_fields=Equals([("subdir", "noarch")]),
-                ))
-
-    def test__scanFile_conda_v2_arch(self):
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure(
+                    format=Equals(BinaryPackageFormat.CONDA_V2),
+                    name=Equals("conda-v2-indep"),
+                    version=Equals("0.1"),
+                    summary=Equals("Example summary"),
+                    description=Equals("Example description"),
+                    architecturespecific=Is(False),
+                    homepage=Equals(""),
+                    user_defined_fields=Equals([("subdir", "noarch")]),
+                    ),
+                }))
+
+    def test__scanFiles_conda_v2_arch(self):
         archive = self.factory.makeArchive()
         distroseries = self.factory.makeDistroSeries(
             distribution=archive.distribution)
@@ -385,21 +406,61 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
         job = CIBuildUploadJob.create(
             build, build.git_repository.owner, archive, distroseries,
             PackagePublishingPocket.RELEASE, target_channel="edge")
-        path = "conda-v2-arch/dist/linux-64/conda-v2-arch-0.1-0.conda"
-        metadata = job._scanFile(datadir(path))
-        self.assertIsInstance(metadata, BinaryArtifactMetadata)
+        path = Path("conda-v2-arch/dist/linux-64/conda-v2-arch-0.1-0.conda")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
         self.assertThat(
-            metadata,
-            MatchesStructure(
-                format=Equals(BinaryPackageFormat.CONDA_V2),
-                name=Equals("conda-v2-arch"),
-                version=Equals("0.1"),
-                summary=Equals("Example summary"),
-                description=Equals("Example description"),
-                architecturespecific=Is(True),
-                homepage=Equals("http://example.com/";),
-                user_defined_fields=Equals([("subdir", "linux-64")]),
-                ))
+            all_metadata,
+            MatchesDict({
+                path.name: MatchesStructure(
+                    format=Equals(BinaryPackageFormat.CONDA_V2),
+                    name=Equals("conda-v2-arch"),
+                    version=Equals("0.1"),
+                    summary=Equals("Example summary"),
+                    description=Equals("Example description"),
+                    architecturespecific=Is(True),
+                    homepage=Equals("http://example.com/";),
+                    user_defined_fields=Equals([("subdir", "linux-64")]),
+                    ),
+                }))
+
+    def test__scanFiles_go(self):
+        self.useFixture(FakeLogger())
+        archive = self.factory.makeArchive()
+        distroseries = self.factory.makeDistroSeries(
+            distribution=archive.distribution)
+        build = self.makeCIBuild(archive.distribution)
+        job = CIBuildUploadJob.create(
+            build, build.git_repository.owner, archive, distroseries,
+            PackagePublishingPocket.RELEASE, target_channel="edge")
+        info_path = Path("go/dist/v0.0.1.info")
+        mod_path = Path("go/dist/v0.0.1.mod")
+        zip_path = Path("go/dist/v0.0.1.zip")
+        tmpdir = Path(self.useFixture(TempDir()).path)
+        shutil.copy2(datadir(str(info_path)), str(tmpdir))
+        shutil.copy2(datadir(str(mod_path)), str(tmpdir))
+        shutil.copy2(datadir(str(zip_path)), str(tmpdir))
+        all_metadata = job._scanFiles(tmpdir)
+        self.assertThat(
+            all_metadata,
+            MatchesDict({
+                info_path.name: MatchesStructure.byEquality(
+                    format=SourcePackageFileType.GO_MODULE_INFO,
+                    name="example.com/t",
+                    version="v0.0.1",
+                    ),
+                mod_path.name: MatchesStructure.byEquality(
+                    format=SourcePackageFileType.GO_MODULE_MOD,
+                    name="example.com/t",
+                    version="v0.0.1",
+                    ),
+                zip_path.name: MatchesStructure.byEquality(
+                    format=SourcePackageFileType.GO_MODULE_ZIP,
+                    name="example.com/t",
+                    version="v0.0.1",
+                    ),
+                }))
 
     def test_run_indep(self):
         archive = self.factory.makeArchive(
@@ -661,6 +722,63 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
                 binarypackageformat=Equals(BinaryPackageFormat.CONDA_V2),
                 distroarchseries=Equals(dases[0]))))
 
+    def test_run_go(self):
+        self.useFixture(FakeLogger())
+        archive = self.factory.makeArchive(
+            repository_format=ArchiveRepositoryFormat.GO_PROXY)
+        distroseries = self.factory.makeDistroSeries(
+            distribution=archive.distribution)
+        dases = [
+            self.factory.makeDistroArchSeries(distroseries=distroseries)
+            for _ in range(2)]
+        build = self.makeCIBuild(
+            archive.distribution, distro_arch_series=dases[0])
+        report = build.getOrCreateRevisionStatusReport("build:0")
+        report.setLog(b"log data")
+        info_path = "go/dist/v0.0.1.info"
+        mod_path = "go/dist/v0.0.1.mod"
+        zip_path = "go/dist/v0.0.1.zip"
+        for path in (info_path, mod_path, zip_path):
+            with open(datadir(path), mode="rb") as f:
+                report.attach(name=os.path.basename(path), data=f.read())
+        artifacts = IStore(RevisionStatusArtifact).find(
+            RevisionStatusArtifact,
+            report=report,
+            artifact_type=RevisionStatusArtifactType.BINARY).order_by("id")
+        job = CIBuildUploadJob.create(
+            build, build.git_repository.owner, archive, distroseries,
+            PackagePublishingPocket.RELEASE, target_channel="edge")
+        transaction.commit()
+
+        with dbuser(job.config.dbuser):
+            JobRunner([job]).runAll()
+
+        self.assertThat(archive.getPublishedSources(), MatchesSetwise(
+            MatchesStructure(
+                sourcepackagename=MatchesStructure.byEquality(
+                    name=build.git_repository.target.name),
+                sourcepackagerelease=MatchesStructure(
+                    ci_build=Equals(build),
+                    sourcepackagename=MatchesStructure.byEquality(
+                        name=build.git_repository.target.name),
+                    version=Equals("v0.0.1"),
+                    format=Equals(SourcePackageType.CI_BUILD),
+                    architecturehintlist=Equals(""),
+                    creator=Equals(build.git_repository.owner),
+                    files=MatchesSetwise(
+                        MatchesStructure.byEquality(
+                            libraryfile=artifacts[0].library_file,
+                            filetype=SourcePackageFileType.GO_MODULE_INFO),
+                        MatchesStructure.byEquality(
+                            libraryfile=artifacts[1].library_file,
+                            filetype=SourcePackageFileType.GO_MODULE_MOD),
+                        MatchesStructure.byEquality(
+                            libraryfile=artifacts[2].library_file,
+                            filetype=SourcePackageFileType.GO_MODULE_ZIP))),
+                format=Equals(SourcePackageType.CI_BUILD),
+                distroseries=Equals(distroseries))))
+        self.assertContentEqual([], archive.getAllPublishedBinaries())
+
     def test_existing_source_and_binary_releases(self):
         # A `CIBuildUploadJob` can be run even if the build in question was
         # already uploaded somewhere, and in that case may add publications
diff --git a/setup.cfg b/setup.cfg
index 523364e..c6c8654 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -77,6 +77,7 @@ install_requires =
     pygpgme
     pymacaroons
     pymemcache
+    pyparsing
     pystache
     python-debian
     python-keystoneclient