From db0111681ba7ece928c71b3b8d98177dbbb37995 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 22 May 2026 13:41:52 +0100 Subject: [PATCH] golang: use pkg.go.dev v1beta API for package and version metadata When using the default proxy, FetchPackage now hits /v1beta/module which returns the resolved repository URL (correct for vanity imports), SPDX license types, and the latest version. FetchVersions hits /v1beta/versions which returns commit times and retracted/deprecated flags in a single paginated response, replacing the N+1 .info calls against the proxy. Any pkgsite failure (including 404, since pkg.go.dev can lag the proxy) falls through to the existing goproxy protocol path. Custom proxies skip pkgsite entirely. --- internal/golang/golang.go | 161 +++++++++++++++++++++++++++++---- internal/golang/golang_test.go | 158 ++++++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+), 16 deletions(-) diff --git a/internal/golang/golang.go b/internal/golang/golang.go index 3904224..52c5bf1 100644 --- a/internal/golang/golang.go +++ b/internal/golang/golang.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "fmt" + "net/url" "strings" "time" "unicode" @@ -15,7 +16,10 @@ import ( const ( DefaultURL = "https://proxy.golang.org" + pkgsiteAPI = "https://pkg.go.dev/v1beta" ecosystem = "golang" + + pkgsitePageLimit = 500 ) func init() { @@ -25,9 +29,10 @@ func init() { } type Registry struct { - baseURL string - client *core.Client - urls *URLs + baseURL string + pkgsiteURL string + client *core.Client + urls *URLs } func New(baseURL string, client *core.Client) *Registry { @@ -38,6 +43,11 @@ func New(baseURL string, client *core.Client) *Registry { baseURL: strings.TrimSuffix(baseURL, "/"), client: client, } + // The pkgsite API only covers modules visible on pkg.go.dev. When pointed + // at a private proxy we fall back to the goproxy protocol exclusively. + if r.baseURL == DefaultURL { + r.pkgsiteURL = pkgsiteAPI + } r.urls = &URLs{baseURL: r.baseURL} return r } @@ -55,6 +65,29 @@ type versionInfo struct { Time time.Time `json:"Time"` } +type pkgsiteLicense struct { + Types []string `json:"types"` +} + +type pkgsiteModule struct { + Path string `json:"path"` + Version string `json:"version"` + RepoURL string `json:"repoUrl"` + Licenses []pkgsiteLicense `json:"licenses"` +} + +type pkgsiteVersion struct { + Version string `json:"version"` + CommitTime time.Time `json:"commitTime"` + Deprecated bool `json:"deprecated"` + Retracted bool `json:"retracted"` +} + +type pkgsiteVersions struct { + Items []pkgsiteVersion `json:"items"` + NextPageToken string `json:"nextPageToken"` +} + // encodeForProxy encodes a module path according to the goproxy protocol. // Capital letters are replaced with "!" followed by the lowercase letter. // https://go.dev/ref/mod#goproxy-protocol @@ -72,13 +105,56 @@ func encodeForProxy(path string) string { } func (r *Registry) FetchPackage(ctx context.Context, name string) (*core.Package, error) { + if r.pkgsiteURL != "" { + if pkg, err := r.fetchPackagePkgsite(ctx, name); err == nil { + return pkg, nil + } + // Any pkgsite failure (including 404, since pkg.go.dev can lag the + // proxy) falls through to the goproxy protocol. + } + return r.fetchPackageProxy(ctx, name) +} + +func (r *Registry) fetchPackagePkgsite(ctx context.Context, name string) (*core.Package, error) { + moduleURL := fmt.Sprintf("%s/module/%s?licenses=true", r.pkgsiteURL, name) + + var mod pkgsiteModule + if err := r.client.GetJSON(ctx, moduleURL, &mod); err != nil { + return nil, err + } + + repoURL := urlparser.Parse(mod.RepoURL) + if repoURL == "" { + // pkgsite already returns a validated URL; keep it even if urlparser + // doesn't recognise the host (e.g. go.googlesource.com, gitea instances). + repoURL = mod.RepoURL + } + if repoURL == "" { + repoURL = urlparser.Parse(deriveRepoURL(name)) + } + + var types []string + for _, l := range mod.Licenses { + types = append(types, l.Types...) + } + + return &core.Package{ + Name: name, + Repository: repoURL, + Homepage: repoURL, + Namespace: namespaceFor(name), + Licenses: strings.Join(types, ", "), + LatestVersion: mod.Version, + }, nil +} + +func (r *Registry) fetchPackageProxy(ctx context.Context, name string) (*core.Package, error) { encoded := encodeForProxy(name) - // Try to get the version list first to verify the module exists listURL := fmt.Sprintf("%s/%s/@v/list", r.baseURL, encoded) body, err := r.client.GetText(ctx, listURL) if err != nil { - if httpErr, ok := err.(*core.HTTPError); ok && httpErr.IsNotFound() { + if isNotFound(err) { return nil, &core.NotFoundError{Ecosystem: ecosystem, Name: name} } return nil, err @@ -88,24 +164,28 @@ func (r *Registry) FetchPackage(ctx context.Context, name string) (*core.Package return nil, &core.NotFoundError{Ecosystem: ecosystem, Name: name} } - // Go modules don't have rich metadata in the proxy protocol - // The repository URL is typically derived from the module path repoURL := urlparser.Parse(deriveRepoURL(name)) - parts := strings.Split(name, "/") - namespace := "" - if len(parts) > 1 { - namespace = strings.Join(parts[:len(parts)-1], "/") - } - return &core.Package{ Name: name, Repository: repoURL, Homepage: repoURL, - Namespace: namespace, + Namespace: namespaceFor(name), }, nil } +func namespaceFor(name string) string { + if i := strings.LastIndex(name, "/"); i > 0 { + return name[:i] + } + return "" +} + +func isNotFound(err error) bool { + httpErr, ok := err.(*core.HTTPError) + return ok && httpErr.IsNotFound() +} + func deriveRepoURL(modulePath string) string { // Common hosting platforms if strings.HasPrefix(modulePath, "github.com/") || @@ -122,12 +202,61 @@ func deriveRepoURL(modulePath string) string { } func (r *Registry) FetchVersions(ctx context.Context, name string) ([]core.Version, error) { + if r.pkgsiteURL != "" { + if versions, err := r.fetchVersionsPkgsite(ctx, name); err == nil { + return versions, nil + } + } + return r.fetchVersionsProxy(ctx, name) +} + +func (r *Registry) fetchVersionsPkgsite(ctx context.Context, name string) ([]core.Version, error) { + var versions []core.Version + token := "" + + for { + q := url.Values{"limit": {fmt.Sprint(pkgsitePageLimit)}} + if token != "" { + q.Set("token", token) + } + pageURL := fmt.Sprintf("%s/versions/%s?%s", r.pkgsiteURL, name, q.Encode()) + + var page pkgsiteVersions + if err := r.client.GetJSON(ctx, pageURL, &page); err != nil { + return nil, err + } + + for _, v := range page.Items { + status := core.StatusNone + switch { + case v.Retracted: + status = core.StatusRetracted + case v.Deprecated: + status = core.StatusDeprecated + } + versions = append(versions, core.Version{ + Number: v.Version, + PublishedAt: v.CommitTime, + Status: status, + }) + } + + if page.NextPageToken == "" { + break + } + token = page.NextPageToken + } + + return versions, nil +} + +func (r *Registry) fetchVersionsProxy(ctx context.Context, name string) ([]core.Version, error) { encoded := encodeForProxy(name) listURL := fmt.Sprintf("%s/%s/@v/list", r.baseURL, encoded) body, err := r.client.GetText(ctx, listURL) if err != nil { - if httpErr, ok := err.(*core.HTTPError); ok && httpErr.IsNotFound() { + if isNotFound(err) { return nil, &core.NotFoundError{Ecosystem: ecosystem, Name: name} } return nil, err @@ -167,7 +296,7 @@ func (r *Registry) FetchDependencies(ctx context.Context, name, version string) body, err := r.client.GetText(ctx, modURL) if err != nil { - if httpErr, ok := err.(*core.HTTPError); ok && httpErr.IsNotFound() { + if isNotFound(err) { return nil, &core.NotFoundError{Ecosystem: ecosystem, Name: name, Version: version} } return nil, err diff --git a/internal/golang/golang_test.go b/internal/golang/golang_test.go index 9c02178..4513556 100644 --- a/internal/golang/golang_test.go +++ b/internal/golang/golang_test.go @@ -38,6 +38,76 @@ func TestFetchPackage(t *testing.T) { } } +func TestFetchPackagePkgsite(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/module/golang.org/x/sync" { + w.WriteHeader(404) + return + } + if r.URL.Query().Get("licenses") != "true" { + t.Errorf("expected licenses=true query param") + } + _ = json.NewEncoder(w).Encode(pkgsiteModule{ + Path: "golang.org/x/sync", + Version: "v0.7.0", + RepoURL: "https://go.googlesource.com/sync", + Licenses: []pkgsiteLicense{ + {Types: []string{"BSD-3-Clause"}}, + }, + }) + })) + defer server.Close() + + reg := New("", core.DefaultClient()) + reg.pkgsiteURL = server.URL + + pkg, err := reg.FetchPackage(context.Background(), "golang.org/x/sync") + if err != nil { + t.Fatalf("FetchPackage failed: %v", err) + } + + if pkg.Repository != "https://go.googlesource.com/sync" { + t.Errorf("unexpected repository: %q", pkg.Repository) + } + if pkg.Licenses != "BSD-3-Clause" { + t.Errorf("unexpected licenses: %q", pkg.Licenses) + } + if pkg.LatestVersion != "v0.7.0" { + t.Errorf("unexpected latest version: %q", pkg.LatestVersion) + } + if pkg.Namespace != "golang.org/x" { + t.Errorf("unexpected namespace: %q", pkg.Namespace) + } +} + +func TestFetchPackagePkgsiteFallback(t *testing.T) { + // pkgsite returns 500, proxy succeeds + proxy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/github.com/gorilla/mux/@v/list" { + _, _ = w.Write([]byte("v1.8.0\n")) + return + } + w.WriteHeader(404) + })) + defer proxy.Close() + + pkgsite := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(500) + })) + defer pkgsite.Close() + + reg := New(proxy.URL, core.NewClient(core.WithMaxRetries(0))) + reg.pkgsiteURL = pkgsite.URL + + pkg, err := reg.FetchPackage(context.Background(), "github.com/gorilla/mux") + if err != nil { + t.Fatalf("FetchPackage should fall back to proxy: %v", err) + } + if pkg.Repository != "https://github.com/gorilla/mux" { + t.Errorf("unexpected repository: %q", pkg.Repository) + } +} + func TestFetchPackageNotFound(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(404) @@ -87,6 +157,94 @@ func TestFetchVersions(t *testing.T) { } } +func TestFetchVersionsPkgsite(t *testing.T) { + page1 := pkgsiteVersions{ + Items: []pkgsiteVersion{ + {Version: "v1.2.0", CommitTime: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)}, + {Version: "v1.1.0", CommitTime: time.Date(2023, 6, 1, 0, 0, 0, 0, time.UTC), Retracted: true}, + }, + NextPageToken: "abc", + } + page2 := pkgsiteVersions{ + Items: []pkgsiteVersion{ + {Version: "v1.0.0", CommitTime: time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC), Deprecated: true}, + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/versions/example.com/mod" { + w.WriteHeader(404) + return + } + if r.URL.Query().Get("token") == "abc" { + _ = json.NewEncoder(w).Encode(page2) + } else { + _ = json.NewEncoder(w).Encode(page1) + } + })) + defer server.Close() + + reg := New("", core.DefaultClient()) + reg.pkgsiteURL = server.URL + + versions, err := reg.FetchVersions(context.Background(), "example.com/mod") + if err != nil { + t.Fatalf("FetchVersions failed: %v", err) + } + + if len(versions) != 3 { + t.Fatalf("expected 3 versions across 2 pages, got %d", len(versions)) + } + if versions[0].Number != "v1.2.0" || versions[0].Status != core.StatusNone { + t.Errorf("unexpected version[0]: %+v", versions[0]) + } + if versions[1].Number != "v1.1.0" || versions[1].Status != core.StatusRetracted { + t.Errorf("expected v1.1.0 retracted, got %+v", versions[1]) + } + if versions[2].Number != "v1.0.0" || versions[2].Status != core.StatusDeprecated { + t.Errorf("expected v1.0.0 deprecated, got %+v", versions[2]) + } + if versions[0].PublishedAt.Year() != 2024 { + t.Errorf("expected commitTime to populate PublishedAt, got %v", versions[0].PublishedAt) + } +} + +func TestFetchVersionsPkgsiteNotFoundFallsThrough(t *testing.T) { + pkgsite := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(404) + _, _ = w.Write([]byte(`{"code":404,"message":"not found"}`)) + })) + defer pkgsite.Close() + + proxy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/example.com/fresh/@v/list": + _, _ = w.Write([]byte("v0.1.0\n")) + case "/example.com/fresh/@v/v0.1.0.info": + _ = json.NewEncoder(w).Encode(versionInfo{Version: "v0.1.0"}) + default: + w.WriteHeader(404) + } + })) + defer proxy.Close() + + reg := New(proxy.URL, core.DefaultClient()) + reg.pkgsiteURL = pkgsite.URL + + versions, err := reg.FetchVersions(context.Background(), "example.com/fresh") + if err != nil { + t.Fatalf("expected fallthrough to proxy on pkgsite 404: %v", err) + } + if len(versions) != 1 || versions[0].Number != "v0.1.0" { + t.Errorf("expected proxy version v0.1.0, got %+v", versions) + } + + _, err = reg.FetchVersions(context.Background(), "example.com/missing") + if _, ok := err.(*core.NotFoundError); !ok { + t.Errorf("expected NotFoundError from proxy, got %T: %v", err, err) + } +} + func TestFetchDependencies(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/github.com/gorilla/mux/@v/v1.8.0.mod" {