diff --git a/internal/initwd/module_install.go b/internal/initwd/module_install.go index 6b282406f2..5378537a51 100644 --- a/internal/initwd/module_install.go +++ b/internal/initwd/module_install.go @@ -46,7 +46,7 @@ type ModuleInstaller struct { // The keys in moduleVersionsUrl are the moduleVersion struct below and // addresses and the values are underlying remote source addresses. - registryPackageSources map[moduleVersion]addrs.ModuleSourceRemote + registryPackageSources map[moduleVersion]registry.PackageLocation } type moduleVersion struct { @@ -79,7 +79,7 @@ func NewModuleInstaller(modsDir string, loader *configload.Loader, registryClien reg: registryClient, fetcher: remotePackageFetcher, registryPackageVersions: make(map[addrs.ModuleRegistryPackage]*response.ModuleVersions), - registryPackageSources: make(map[moduleVersion]addrs.ModuleSourceRemote), + registryPackageSources: make(map[moduleVersion]registry.PackageLocation), } } @@ -751,7 +751,7 @@ func (i *ModuleInstaller) installRegistryModule(ctx context.Context, req *config // first check the cache for the download URL moduleAddr := moduleVersion{module: packageAddr, version: latestMatch.String()} if _, exists := i.registryPackageSources[moduleAddr]; !exists { - realAddrRaw, err := reg.ModuleLocation(ctx, regsrcAddr, latestMatch.String()) + packageLocation, err := reg.ModuleLocation(ctx, regsrcAddr, latestMatch.String()) if err != nil { log.Printf("[ERROR] %s from %s %s: %s", key, addr, latestMatch, err) diags = diags.Append(&hcl.Diagnostic{ @@ -762,42 +762,42 @@ func (i *ModuleInstaller) installRegistryModule(ctx context.Context, req *config tracing.SetSpanError(span, diags) return nil, nil, diags } - realAddr, err := addrs.ParseModuleSource(realAddrRaw) - if err != nil { - diags = diags.Append(&hcl.Diagnostic{ - Severity: hcl.DiagError, - Summary: "Invalid package location from module registry", - Detail: fmt.Sprintf("Module registry %s returned invalid source location %q for %s %s: %s.", hostname, realAddrRaw, addr, latestMatch, err), - }) - tracing.SetSpanError(span, diags) - return nil, nil, diags - } - - span.SetAttributes(traceattrs.OpenTofuModuleSource(realAddr.String())) - - switch realAddr := realAddr.(type) { - // Only a remote source address is allowed here: a registry isn't - // allowed to return a local path (because it doesn't know what - // its being called from) and we also don't allow recursively pointing - // at another registry source for simplicity's sake. - case addrs.ModuleSourceRemote: - i.registryPackageSources[moduleAddr] = realAddr - default: - diags = diags.Append(&hcl.Diagnostic{ - Severity: hcl.DiagError, - Summary: "Invalid package location from module registry", - Detail: fmt.Sprintf("Module registry %s returned invalid source location %q for %s %s: must be a direct remote package address.", hostname, realAddrRaw, addr, latestMatch), - }) - tracing.SetSpanError(span, diags) - return nil, nil, diags - } + span.SetAttributes(traceattrs.OpenTofuModuleSource(packageLocation.UILabel())) + i.registryPackageSources[moduleAddr] = packageLocation } - dlAddr := i.registryPackageSources[moduleAddr] + packageLocation := i.registryPackageSources[moduleAddr] - log.Printf("[TRACE] ModuleInstaller: %s %s %s is available at %q", key, packageAddr, latestMatch, dlAddr.Package) - - err := fetcher.FetchPackage(ctx, instPath, dlAddr.Package.String()) + log.Printf("[TRACE] ModuleInstaller: %s %s %s is available at %q", key, packageAddr, latestMatch, packageLocation.UILabel()) + var err error // populated in the cases below + modDir := instPath // possibly overwritten below if the module is in a subdirectory of the package + switch packageLocation := packageLocation.(type) { + case registry.PackageLocationDirect: + // Direct locations are handled by the same registry client that + // returned them, since the download might require using equivalent + // credentials as were used to decide the location. modDir is the + // directory where the requested module was installed, which might + // be a subdirectory of instPath. + modDir, err = reg.InstallModulePackage(ctx, packageLocation, instPath) + case registry.PackageLocationIndirect: + // Indirect locations are handled by the package fetcher, similar to + // if the same address had been specified directly in the "source" + // argument of the module call. + err = fetcher.FetchPackage(ctx, instPath, packageLocation.SourceAddr.Package.String()) + if packageLocation.SourceAddr.Subdir != "" { + subDir := filepath.FromSlash(packageLocation.SourceAddr.Subdir) + modDir = filepath.Join(modDir, subDir) + } + default: + // The above cases should be exhaustive for all of the implementations + // of registry.PackageLocation, so we should not get here. + diags = diags.Append(&hcl.Diagnostic{ + Severity: hcl.DiagError, + Summary: "Unsupported package location", + Detail: fmt.Sprintf("Registry client returned a package location of type %T, which the module installer doesn't support. This is a bug in OpenTofu.", packageLocation), + }) + return nil, nil, diags + } if errors.Is(err, context.Canceled) { diags = diags.Append(&hcl.Diagnostic{ Severity: hcl.DiagError, @@ -815,27 +815,19 @@ func (i *ModuleInstaller) installRegistryModule(ctx context.Context, req *config diags = diags.Append(&hcl.Diagnostic{ Severity: hcl.DiagError, Summary: "Failed to download module", - Detail: fmt.Sprintf("Could not download module %q (%s:%d) source code from %q: %s.", req.Name, req.CallRange.Filename, req.CallRange.Start.Line, dlAddr, err), + Detail: fmt.Sprintf("Could not download module %q (%s:%d) source code from %q: %s.", req.Name, req.CallRange.Filename, req.CallRange.Start.Line, packageLocation, err), Subject: req.CallRange.Ptr(), }) return nil, nil, diags } - log.Printf("[TRACE] ModuleInstaller: %s %q was downloaded to %s", key, dlAddr.Package, instPath) - - // Incorporate any subdir information from the original path into the - // address returned by the registry in order to find the final directory - // of the target module. - finalAddr := dlAddr.FromRegistry(addr) - subDir := filepath.FromSlash(finalAddr.Subdir) - modDir := filepath.Join(instPath, subDir) - - log.Printf("[TRACE] ModuleInstaller: %s should now be at %s", key, modDir) + log.Printf("[TRACE] ModuleInstaller: %s %q was downloaded to %s", key, packageLocation.UILabel(), modDir) // Finally we are ready to try actually loading the module. mod, mDiags := i.loader.Parser().LoadConfigDir(modDir, req.Call) if mod == nil { + subDir := packageLocation.Subdir() isMissingSubDir, missingDir := isSubDirNonExistent(modDir) // nil indicates missing or unreadable directory, so we'll // discard the returned diags and return a more specific diff --git a/internal/registry/client.go b/internal/registry/client.go index 14d3a46b55..0fa031db96 100644 --- a/internal/registry/client.go +++ b/internal/registry/client.go @@ -13,7 +13,10 @@ import ( "log" "net/http" "net/url" + "os" "path" + "path/filepath" + "strconv" "strings" "time" @@ -21,6 +24,7 @@ import ( "github.com/opentofu/svchost" "github.com/opentofu/svchost/disco" + "github.com/opentofu/opentofu/internal/addrs" "github.com/opentofu/opentofu/internal/httpclient" "github.com/opentofu/opentofu/internal/registry/regsrc" "github.com/opentofu/opentofu/internal/registry/response" @@ -158,9 +162,13 @@ func (c *Client) addRequestCreds(ctx context.Context, host svchost.Hostname, req } } -// ModuleLocation find the download location for a specific version module. -// This returns a string, because the final location may contain special go-getter syntax. -func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, version string) (string, error) { +// ModuleLocation find the package location for a specific module version. +// +// This returns one of the concrete implementations of the closed interface +// [PackageLocation], depending on what type of location the registry chooses +// to report. Refer to the documentation of those types for information on +// how each variant should be used to actually install the package. +func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, version string) (PackageLocation, error) { ctx, span := tracing.Tracer().Start(ctx, "Find Module Location", tracing.SpanAttributes( traceattrs.OpenTofuModuleCallName(module.RawName), traceattrs.OpenTofuModuleSource(module.Module()), @@ -170,12 +178,12 @@ func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, vers host, err := module.SvcHost() if err != nil { - return "", err + return nil, err } service, err := c.Discover(ctx, host, modulesServiceID) if err != nil { - return "", err + return nil, err } var p *url.URL @@ -185,7 +193,7 @@ func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, vers p, err = url.Parse(path.Join(module.Module(), version, "download")) } if err != nil { - return "", err + return nil, err } download := service.ResolveReference(p) @@ -193,7 +201,7 @@ func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, vers req, err := retryablehttp.NewRequestWithContext(ctx, "GET", download.String(), nil) if err != nil { - return "", err + return nil, err } req = req.WithContext(ctx) @@ -203,46 +211,163 @@ func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, vers resp, err := c.client.Do(req) if err != nil { - return "", err + return nil, err } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - return "", fmt.Errorf("error reading response body from registry: %w", err) + return nil, fmt.Errorf("error reading response body from registry: %w", err) } - var location string - switch resp.StatusCode { case http.StatusOK: var v response.ModuleLocationRegistryResp if err := json.Unmarshal(body, &v); err != nil { - return "", fmt.Errorf("module %q version %q failed to deserialize response body %s: %w", + return nil, fmt.Errorf("module %q version %q failed to deserialize response body %s: %w", module, version, body, err) } - location = v.Location + if v.UseRegistryCredentials == nil { + // The registry has not opted in to "direct" installation, so we + // assume that it wants the old-style "indirect" behavior where + // the registry is essentially just an lookup table for + // go-getter-style source addresses, in which case the registry + // isn't involved in the final download step at all. - // if the location is empty, we will fallback to the header - if location == "" { - location = resp.Header.Get(xTerraformGet) + if v.Location == "" { + // If the location is empty, we will fallback to the header. + // Note that this only works if the body contains valid JSON syntax. + // This was probably not actually the originally intended behavior, + // since this fallback was introduced to fix a regression in + // https://github.com/opentofu/opentofu/pull/2079 but that didn't + // _quite_ restore the original behavior of ignoring the body completely + // when using this header. Nonetheless, we're keeping this constraint + // to avoid churning this protocol further since registry + // implementers tend to want to support many OpenTofu versions + // at once and so having many different variations is harder + // to test. Those who want to do the legacy thing of using + // X-Terraform-Get should use a "204 No Content" status code if + // they can't provide valid JSON syntax in the body. + return preparePackageLocationIndirect(resp.Header.Get(xTerraformGet), module, download) + } + return preparePackageLocationIndirect(v.Location, module, download) } + // Otherwise, the registry has opted in to the new-style "direct" + // installation approach, where the registry returns a URL that's under + // its own control and we fetch from it directly instead of delegating + // to go-getter. + return preparePackageLocationDirect(v.Location, module, download, bool(*v.UseRegistryCredentials)) + case http.StatusNoContent: // FALLBACK: set the found location from the header - location = resp.Header.Get(xTerraformGet) + return preparePackageLocationIndirect(resp.Header.Get(xTerraformGet), module, download) case http.StatusNotFound: - return "", fmt.Errorf("module %q version %q not found", module, version) + return nil, fmt.Errorf("module %q version %q not found", module, version) default: // anything else is an error: - return "", fmt.Errorf("error getting download location for %q: %s resp:%s", module, resp.Status, body) + return nil, fmt.Errorf("error getting download location for %q: %s resp:%s", module, resp.Status, body) + } +} + +// InstallModulePackage attempts to install a module package from the given +// location into the given target directory. +// +// This method is used only for "direct" package locations, where the registry +// is directly hosting packages in locations under its own control, and possibly +// authenticated using the registry's own credentials. If you have a +// [PackageLocationIndirect] instead then you must handle it separately using +// the "remote source address" installation process. +// +// If successful this returns the final path of the requested module, taking +// into account any subdirectory selection that was included in the original +// module request. If the original source address did not include a subdirectory +// portion then the result is just a normalized version of targetDir. +func (c *Client) InstallModulePackage(ctx context.Context, location PackageLocationDirect, targetDir string) (string, error) { + urlString := location.packageURL.String() + ctx, span := tracing.Tracer().Start(ctx, "Fetch Package", + tracing.SpanAttributes(traceattrs.URLFull(urlString)), + ) + defer span.End() + + req, err := retryablehttp.NewRequestWithContext(ctx, "GET", urlString, nil) + if err != nil { + return "", fmt.Errorf("preparing to download from %s: %w", urlString, err) } - if location == "" { - return "", fmt.Errorf("failed to get download URL for %q: %s resp:%s", module, resp.Status, body) + host, err := location.module.SvcHost() + if err != nil { + // We should not get here because location.module should be populated + // correctly by [Client.ModuleLocation]. + return "", fmt.Errorf("package location has invalid registry hostname: %w", err) + } + if location.useRegistryCredentials { + c.addRequestCreds(ctx, host, req.Request) + } + req.Header.Set(xTerraformVersion, tfVersion) + // We'll set some content negotiation headers just in case that helps + // someone using a general-purpose static HTTP server serve content + // compressed on the fly by the server. (We will actually tolerate more + // than what we report here, in the more common case where the server + // just returns whatever it has on disk without any transformation, + // but this is just a hint for some common choices. + req.Header.Set("Accept", "application/zip, application/x-tar; *;q=0.1") + req.Header.Set("Accept-Encoding", "identity, gzip, *;q=0.1") + + // We first fetch the raw content at the URL into a temporary file, and + // then we can sniff what format it seems to be in so that we'll tolerate + // servers that aren't able to correctly populate Content-Type and other + // similar header fields (which is relatively common for static file + // servers used for serving large files like these; e.g. they sometimes + // report just "application/octet-stream", or misreport which compressor + // was used for a tar stream, etc.). + f, err := os.CreateTemp("", "opentofu-modpkg-") + if err != nil { + return "", fmt.Errorf("creating temporary file for module package: %w", err) + } + defer func() { + // We make a best-effort to proactively clean the temporary file, but + // if this fails we'll still let installation succeed and assume that + // an OS service will clean the temporary directory itself eventually. + _ = f.Close() + _ = os.Remove(f.Name()) + }() + resp, err := c.client.Do(req) + if err != nil { + return "", err // net/http includes method and URL in its errors automatically + } + defer resp.Body.Close() + + n, err := io.Copy(f, resp.Body) + if err != nil { + return "", fmt.Errorf("copying module package to temporary file: %w", err) + } + if wantN, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64); err == nil { + // If the server told us how much data it was expecting to send then + // we'll make sure we got exactly that much data. + if n != wantN { + return "", fmt.Errorf("server promised %d bytes, but returned %d bytes", wantN, n) + } + } + + err = extractModulePackage(f, targetDir) + if err != nil { + return "", fmt.Errorf("extracting package archive: %w", err) + } + modDir := targetDir + if location.module.RawSubmodule != "" { + subDir := filepath.FromSlash(location.module.RawSubmodule) + modDir = filepath.Join(modDir, subDir) + } + return modDir, nil +} + +func preparePackageLocationIndirect(realAddrRaw string, forModule *regsrc.Module, baseURL *url.URL) (PackageLocation, error) { + if realAddrRaw == "" { + return nil, fmt.Errorf("registry did not return a location for this package") } // If location looks like it's trying to be a relative URL, treat it as @@ -256,14 +381,64 @@ func (c *Client) ModuleLocation(ctx context.Context, module *regsrc.Module, vers // to be an absolute URL, but we are more liberal here because third-party // registry implementations may not "know" their own absolute URLs if // e.g. they are running behind a reverse proxy frontend, or such. - if strings.HasPrefix(location, "/") || strings.HasPrefix(location, "./") || strings.HasPrefix(location, "../") { - locationURL, err := url.Parse(location) + if strings.HasPrefix(realAddrRaw, "/") || strings.HasPrefix(realAddrRaw, "./") || strings.HasPrefix(realAddrRaw, "../") { + locationURL, err := url.Parse(realAddrRaw) if err != nil { - return "", fmt.Errorf("invalid relative URL for %q: %w", module, err) + return nil, fmt.Errorf("invalid relative URL %q: %w", realAddrRaw, err) } - locationURL = download.ResolveReference(locationURL) - location = locationURL.String() + locationURL = baseURL.ResolveReference(locationURL) + realAddrRaw = locationURL.String() } - return location, nil + realAddrAny, err := addrs.ParseModuleSource(realAddrRaw) + if err != nil { + return nil, fmt.Errorf( + "registry returned invalid package location %q: %w", + realAddrRaw, err, + ) + } + realAddr, ok := realAddrAny.(addrs.ModuleSourceRemote) + if !ok { + return nil, fmt.Errorf("registry returned invalid package location %q: must be a direct remote package address", realAddrRaw) + } + + // When we're installing indirectly it's possible that both the registry + // source address and the go-getter-style address returned frmo the registry + // include a "subdirectory" component, in which case we need to resolve + // the final effective subdirectory path that combines both. + realAddr = realAddr.FromRegistry( + // Unfortunately we have some tech debt here where this old registry + // client code uses some older conventions for representing module + // registry addresses, so we need to adapt this to the modern + // representation. + forModule.AsModuleSourceRegistry(), + ) + + return PackageLocationIndirect{ + SourceAddr: realAddr, + }, nil +} + +func preparePackageLocationDirect(locationRaw string, originalAddr *regsrc.Module, baseURL *url.URL, useRegistryCredentials bool) (PackageLocation, error) { + packageURL, err := url.Parse(locationRaw) + if err != nil { + return nil, fmt.Errorf("registry returned an invalid package URL: %w", err) + } + + if !packageURL.IsAbs() { + // We resolve relative URLs against the URL we got the location from. + packageURL = baseURL.ResolveReference(packageURL) + } + if packageURL.Scheme != "http" && packageURL.Scheme != "https" { + return nil, fmt.Errorf("registry returned invalid package URL %q: must be http or https URL", locationRaw) + } + if packageURL.Fragment != "" { + return nil, fmt.Errorf("registry returned invalid package URL %q: must not include fragment part", locationRaw) + } + + return PackageLocationDirect{ + module: originalAddr, + packageURL: packageURL, + useRegistryCredentials: useRegistryCredentials, + }, nil } diff --git a/internal/registry/client_test.go b/internal/registry/client_test.go index 45ee81f5b7..3815b0c536 100644 --- a/internal/registry/client_test.go +++ b/internal/registry/client_test.go @@ -16,10 +16,12 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "github.com/hashicorp/go-retryablehttp" version "github.com/hashicorp/go-version" "github.com/opentofu/svchost/disco" + "github.com/opentofu/opentofu/internal/addrs" "github.com/opentofu/opentofu/internal/httpclient" "github.com/opentofu/opentofu/internal/registry/regsrc" "github.com/opentofu/opentofu/internal/registry/response" @@ -139,9 +141,13 @@ func TestLookupModuleLocationRelative(t *testing.T) { t.Fatal(err) } - want := server.URL + "/relative-path" - if got != want { - t.Errorf("wrong location %s; want %s", got, want) + want := PackageLocationIndirect{ + SourceAddr: addrs.ModuleSourceRemote{ + Package: addrs.ModulePackage(server.URL + "/relative-path"), + }, + } + if diff := cmp.Diff(want, got); diff != "" { + t.Error("wrong location\n" + diff) } } @@ -309,28 +315,80 @@ func TestLookupModuleNetworkError(t *testing.T) { } func TestModuleLocation_readRegistryResponse(t *testing.T) { + makeIndirectLocation := func(packageAddr string, subDir string) PackageLocationIndirect { + return PackageLocationIndirect{ + SourceAddr: addrs.ModuleSourceRemote{ + Package: addrs.ModulePackage(packageAddr), + Subdir: subDir, + }, + } + } + mustParseURL := func(s string) *url.URL { + ret, err := url.Parse(s) + if err != nil { + t.Fatal(err) + } + return ret + } + cases := map[string]struct { src string handlerFunc func(w http.ResponseWriter, r *http.Request) registryFlags []uint8 - want string + want PackageLocation wantErrorStr string wantToReadFromHeader bool wantStatusCode int }{ - "shall find the module location in the registry response body": { + "shall find direct module location in the registry response body, opting to use the registry's credentials": { + src: "exists-in-registry/identifier/provider", + want: PackageLocationDirect{ + module: ®src.Module{ + RawHost: ®src.FriendlyHost{Raw: "registry.opentofu.org"}, + RawNamespace: "exists-in-registry", + RawName: "identifier", + RawProvider: "provider", + }, + packageURL: mustParseURL("https://example.com/package.zip"), + useRegistryCredentials: true, + }, + wantStatusCode: http.StatusOK, + handlerFunc: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"location":"https://example.com/package.zip","use_registry_credentials":true}`)) + }, + }, + "shall find direct module location in the registry response body, not opting to use the registry's credentials": { + src: "exists-in-registry/identifier/provider", + want: PackageLocationDirect{ + module: ®src.Module{ + RawHost: ®src.FriendlyHost{Raw: "registry.opentofu.org"}, + RawNamespace: "exists-in-registry", + RawName: "identifier", + RawProvider: "provider", + }, + packageURL: mustParseURL("https://example.com/package.zip"), + useRegistryCredentials: false, + }, + wantStatusCode: http.StatusOK, + handlerFunc: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"location":"https://example.com/package.zip","use_registry_credentials":false}`)) + }, + }, + "shall find indirect module location in the registry response body": { src: "exists-in-registry/identifier/provider", - want: "file:///registry/exists", + want: makeIndirectLocation("file:///registry/exists", ""), wantStatusCode: http.StatusOK, handlerFunc: func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) _ = json.NewEncoder(w).Encode(response.ModuleLocationRegistryResp{Location: "file:///registry/exists"}) }, }, - "shall find the module location in the registry response header": { + "shall find indirect module location in the registry response header": { src: "exists-in-registry/identifier/provider", registryFlags: []uint8{test.WithModuleLocationInHeader}, - want: "file:///registry/exists", + want: makeIndirectLocation("file:///registry/exists", ""), wantToReadFromHeader: true, wantStatusCode: http.StatusNoContent, handlerFunc: func(w http.ResponseWriter, r *http.Request) { @@ -338,9 +396,9 @@ func TestModuleLocation_readRegistryResponse(t *testing.T) { w.WriteHeader(http.StatusNoContent) }, }, - "shall read location from the registry response body even if the header with location address is also set": { + "shall read indirect location from the registry response body even if the header with location address is also set": { src: "exists-in-registry/identifier/provider", - want: "file:///registry/exists", + want: makeIndirectLocation("file:///registry/exists", ""), wantStatusCode: http.StatusOK, wantToReadFromHeader: false, registryFlags: []uint8{test.WithModuleLocationInBody, test.WithModuleLocationInHeader}, @@ -391,7 +449,7 @@ func TestModuleLocation_readRegistryResponse(t *testing.T) { }, "shall fail because location is not found in the response": { src: "foo/bar/baz", - wantErrorStr: `failed to get download URL for "foo/bar/baz": 200 OK resp:{"foo":"git::https://github.com/foo/terraform-baz-bar?ref=v0.2.0"}`, + wantErrorStr: `registry did not return a location for this package`, wantStatusCode: http.StatusOK, handlerFunc: func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) @@ -431,8 +489,8 @@ func TestModuleLocation_readRegistryResponse(t *testing.T) { if err != nil && !strings.Contains(err.Error(), tc.wantErrorStr) { t.Fatalf("unexpected error content: want=%s, got=%v", tc.wantErrorStr, err) } - if got != tc.want { - t.Fatalf("unexpected location: want=%s, got=%v", tc.want, got) + if diff := cmp.Diff(tc.want, got, cmp.AllowUnexported(PackageLocationDirect{})); diff != "" { + t.Fatal("unexpected location\n" + diff) } // Verify status code if we have a successful response diff --git a/internal/registry/package_extract.go b/internal/registry/package_extract.go new file mode 100644 index 0000000000..f3dab82731 --- /dev/null +++ b/internal/registry/package_extract.go @@ -0,0 +1,122 @@ +// Copyright (c) The OpenTofu Authors +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2023 HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package registry + +import ( + "archive/zip" + "compress/gzip" + "fmt" + "io" + "os" + + "github.com/hashicorp/go-getter" + "github.com/ulikunitz/xz" +) + +func extractModulePackage(tempF *os.File, targetDir string) error { + // We reuse go-getter's decompressors for the actual extraction, because + // they are already hardened against a number of previously-discovered + // attacks involving crafted archives, and if any similar problems are + // discovered later then upgrading go-getter will patch both this and + // the go-getter-based module installation path. + decompressor, err := sniffPackageDecompressor(tempF) + if err != nil { + return err + } + + // getter.Decompressor wants to work with filenames rather than open + // files, so we need to pass it the path to our temporary file now. + // Note that this could potentially race if another process modifies + // or removes the file before the decompressor opens it, but the + // decompressors should all be robust to malicious input anyway. + return decompressor.Decompress(targetDir, tempF.Name(), true, 0 /*default umask*/) +} + +var packageDecompressSniffers = []func(*os.File, int64) getter.Decompressor{ + func(f *os.File, size int64) getter.Decompressor { + // zip.NewReader succeeds only if the file has a zip header + _, err := zip.NewReader(f, size) + if err != nil { + return nil + } + return &getter.ZipDecompressor{} + }, + func(f *os.File, _ int64) getter.Decompressor { + // gzip.NewReader succeeds only if the file has a gzip header + _, err := gzip.NewReader(f) + if err != nil { + return nil + } + // Tar archives don't have a header, so we just assume that any + // gzip stream is intended to contain a tar stream. + return &getter.TarGzipDecompressor{} + }, + func(f *os.File, _ int64) getter.Decompressor { + buf := make([]byte, xz.HeaderLen) + n, err := f.Read(buf) + if err != nil || n != len(buf) { + return nil // not able to read an xz header + } + if !xz.ValidHeader(buf) { + return nil + } + // Tar archives don't have a header, so we just assume that any + // xz stream is intended to contain a tar stream. + return &getter.TarXzDecompressor{} + }, + func(f *os.File, _ int64) getter.Decompressor { + // encoding/bzip2 doesn't offer a direct way to ask if a stream + // has a valid bzip2 header, so we'll check it manually by looking + // for the "BZ" magic number at the very start. This sniffer is + // intentionally last because it's doing the least checking and + // so is most likely to generate false positives. + // (The go-getter decompressor we return will check this more + // thoroughly; our job here is just to decide if it seems likely + // that this was intended to be a bzip2 stream.) + + // We're reading four bytes here because a file smaller than that + // cannot possibly be a valid bzip2 stream. The last two bytes here + // are real header fields though, not part of the magic number. + buf := make([]byte, 4) + n, err := f.Read(buf) + if err != nil || n != len(buf) { + return nil // not able to read a magic number + } + if buf[0] != 'B' || buf[1] != 'Z' { + return nil // not the magic number we were looking for + } + // Tar archives don't have a header, so we just assume that any + // bzip2 stream is intended to contain a tar stream. + return &getter.TarBzip2Decompressor{} + }, +} + +func sniffPackageDecompressor(tempF *os.File) (getter.Decompressor, error) { + // Our approach here is just to try opening the file in a few different + // ways where success implies a file was probably intended to be of + // a particular format, but once we've decided we'll just let the real + // decompressor do the actual validation of the package. + + info, err := tempF.Stat() + if err != nil { + return nil, err // Error message already mentions it was trying to stat + } + fileSize := info.Size() + for _, sniffer := range packageDecompressSniffers { + _, err := tempF.Seek(0, io.SeekStart) + if err != nil { + // Should not get here because the caller should always give us + // a regular file. The error message from stdlib already mentions that + // it was trying to seek. + return nil, err + } + if ret := sniffer(tempF, fileSize); ret != nil { + return ret, nil + } + } + // If we fall out here then we weren't able to detect a supported format. + return nil, fmt.Errorf("module package is not zip archive, or tar archive with gz, xz, or bzip2 compression") +} diff --git a/internal/registry/package_location.go b/internal/registry/package_location.go new file mode 100644 index 0000000000..7fc3751d42 --- /dev/null +++ b/internal/registry/package_location.go @@ -0,0 +1,125 @@ +// Copyright (c) The OpenTofu Authors +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2023 HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package registry + +import ( + "net/url" + + "github.com/opentofu/opentofu/internal/addrs" + "github.com/opentofu/opentofu/internal/registry/regsrc" +) + +// PackageLocation abstractly represents a location that a module package should +// be installed from. +// +// There are exactly two concrete implementations of this interface: +// [PackageLocationDirect] for packages that are hosted as part of the +// registry they were reported from, and [PackageLocationIndirect] (for +// registry packages that are really just aliases for source addresses that +// someone could've specified directly in their configuration). +// +// This is a closed interface. If any new implementations of it are added in +// future then an exhaustive type-switch over these in the module installer +// will need to be updated to support the new variants. +type PackageLocation interface { + // UILabel returns a label that can be used to concisely refer to this + // location in the OpenTofu UI, such as when reporting progress or + // describing problems in error messages. + // + // The result is not necessarily a unique identifier for the location. It's + // just expected to be something a human reader could use to confirm whether + // OpenTofu is installing from somewhere reasonable and expected. + UILabel() string + + // Subdir returns a path to a directory within the package that contains + // the module that is being selected. Returns an empty string if the root + // of the package contains the selected module. + Subdir() string + + // This unexported method means that only types within this package can + // implement this interface. + packageLocationSigil() +} + +// PackageLocationDirect represents a module package location that's considered +// to be a part of the registry that reported it, and so the same registry +// client that reported this location should also be used to install it. +// +// This type is intentionally opaque, since values of this type should be +// passed directly to [Client.InstallPackage] on the same Client instance +// that returned this value. It encapsulates everything that client would need +// to perform the installation. +type PackageLocationDirect struct { + // module is the registry module address that the package at this location + // is intended to satisfy. We track this so that the client can decide + // which credentials (if any) to use when requesting the package. + module *regsrc.Module + + // packageURL is the absolute HTTP or HTTPS URL where the module package + // is located. This URL should respond to a GET request by returning a + // successful response whose body is either a "zip" archive, or is a + // "tar" archive using either gz, xz, or bzip2 compression. + packageURL *url.URL + + // useRegistryCredentials records whether the registry directed OpenTofu + // to reuse the same credentials that were used to request this location + // (or, at least, functionally-equivalent credentials) when making a GET + // request to the URL given in archiveURL. + // + // This is used for private registries that wish to protect both metadata + // and packages using the same credentials. If this is false then the + // request to archiveURL uses no credentials at all and so that URL must + // either be willing to serve an anonymous request or some sort of + // credential information must be packed into the URL itself, such as if + // using a mechanism like AWS S3's "presigned URLs": + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-presigned-url.html + useRegistryCredentials bool +} + +var _ PackageLocation = PackageLocationDirect{} + +// UILabel implements PackageLocation. +func (p PackageLocationDirect) UILabel() string { + return p.packageURL.String() +} + +// Subdir implements PackageLocation. +func (p PackageLocationDirect) Subdir() string { + return p.module.RawSubmodule +} + +// packageLocationSigil implements PackageLocation. +func (p PackageLocationDirect) packageLocationSigil() {} + +// PackageLocationIndirect represents a module package that is accessible +// through a "remote" module source address just like what could be written +// directly in a "source" argument in a module call, and so must be installed +// through the normal remote package fetcher instead of through the registry +// client. +// +// For locations of this type, the registry client that produced it is no longer +// involved after the location has been decided. +type PackageLocationIndirect struct { + // SourceAddr is the remote source address to install from, which should + // be treated in an equivalent way to how this address would've been treated + // if specified directly in a module call's "source" argument. + SourceAddr addrs.ModuleSourceRemote +} + +var _ PackageLocation = PackageLocationIndirect{} + +// UILabel implements PackageLocation. +func (p PackageLocationIndirect) UILabel() string { + return p.SourceAddr.ForDisplay() +} + +// Subdir implements PackageLocation. +func (p PackageLocationIndirect) Subdir() string { + return p.SourceAddr.Subdir +} + +// packageLocationSigil implements PackageLocation. +func (p PackageLocationIndirect) packageLocationSigil() {} diff --git a/internal/registry/regsrc/module.go b/internal/registry/regsrc/module.go index 66fe456c98..0f6e78729f 100644 --- a/internal/registry/regsrc/module.go +++ b/internal/registry/regsrc/module.go @@ -11,6 +11,7 @@ import ( "regexp" "strings" + regaddr "github.com/opentofu/registry-address/v2" "github.com/opentofu/svchost" "github.com/opentofu/opentofu/internal/addrs" @@ -116,6 +117,25 @@ func ModuleFromModuleSourceAddr(addr addrs.ModuleSourceRegistry) *Module { return ret } +// AsModuleSourceRegistry translates this legacy representation of module +// registry addresses back into the modern model [addrs.ModuleSourceRegistry]. +// +// Normally [addrs.ModuleSourceRegistry] values are normalized during parsing, +// but this function doesn't actually do any parsing so its result is not +// guaranteed to be normalized unless the receiver was originally created +// with [ModuleFromModuleSourceAddr] and not modified in the meantime. +func (m *Module) AsModuleSourceRegistry() addrs.ModuleSourceRegistry { + return addrs.ModuleSourceRegistry{ + Package: regaddr.ModulePackage{ + Host: svchost.Hostname(m.Host().Normalized()), + Namespace: m.RawNamespace, + Name: m.RawName, + TargetSystem: m.RawProvider, // this field was never actually enforced to be a provider address, so now has a more general name + }, + Subdir: m.RawSubmodule, + } +} + // ModuleFromRegistryPackageAddr is similar to ModuleFromModuleSourceAddr, but // it works with just the isolated registry package address, and not the // full source address. diff --git a/internal/registry/response/module_download.go b/internal/registry/response/module_download.go index d1a44e1b8d..3b49ab2e77 100644 --- a/internal/registry/response/module_download.go +++ b/internal/registry/response/module_download.go @@ -3,9 +3,51 @@ package response +import ( + "bytes" + "fmt" +) + // ModuleLocationRegistryResp defines the OpenTofu registry response // returned when calling the endpoint /v1/modules/:namespace/:name/:system/:version/download type ModuleLocationRegistryResp struct { // The URL to download the module from. Location string `json:"location"` + + // If not nil, represents that the registry wishes to provide the module + // package directly itself instead of delegating to a separate + // go-getter-style source address. + // + // In that case, the registry can set either "true" to request that the + // final download request should use the same credentials used to fetch the + // download location, or "false" to request that the request should be + // made anonymously (e.g. if the URL already contains something that acts + // as authentication credentials). + UseRegistryCredentials *StrictBool `json:"use_registry_credentials"` +} + +// StrictBool is a named type representing a bool value that must be written +// in JSON as exactly "true" or "false". In particular, "null" is not permitted +// as an alias for "false", unlike the Go JSON package's default behavior. +// +// This is here really just to implement [json.Unmarshaler] for conveniently +// handling JSON properties that have this requirement. +type StrictBool bool + +func (b *StrictBool) UnmarshalJSON(src []byte) error { + // This method gets called only when the associated JSON property is + // actually present, and in that case gets called with a preallocated + // bool value that we need to overwrite based on the source. + src = bytes.TrimSpace(src) + + // There are only two possible valid JSON boolean tokens, so we'll just + // handle them directly here for simplicity's sake. + if bytes.Equal(src, []byte{'t', 'r', 'u', 'e'}) { + *b = true + } else if bytes.Equal(src, []byte{'f', 'a', 'l', 's', 'e'}) { + *b = false + } else { + return fmt.Errorf("must be either true or false") + } + return nil }