Files
opentf/internal/getmodules/getter.go
2025-05-09 10:16:38 +01:00

241 lines
9.6 KiB
Go

// Copyright (c) The OpenTofu Authors
// SPDX-License-Identifier: MPL-2.0
// Copyright (c) 2023 HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package getmodules
import (
"context"
"fmt"
"log"
"os"
"strings"
"sync"
getter "github.com/hashicorp/go-getter"
"github.com/opentofu/opentofu/internal/copy"
)
// We configure our own go-getter detector and getter sets here, because
// the set of sources we support is part of OpenTofu's documentation and
// so we don't want any new sources introduced in go-getter to sneak in here
// and work even though they aren't documented. This also insulates us from
// any meddling that might be done by other go-getter callers linked into our
// executable.
//
// Note that over time we've found go-getter's design to be not wholly fit
// for OpenTofu's purposes in various ways, and so we're continuing to use
// it here because our backward compatibility with earlier versions depends
// on it, but we use go-getter very carefully and always only indirectly via
// the public API of this package so that we can get the subset of the
// go-getter functionality we need while working around some of the less
// helpful parts of its design. See the comments in various other functions
// in this package which call into go-getter for more information on what
// tradeoffs we're making here.
var goGetterDetectors = []getter.Detector{
&withoutQueryParams{d: new(getter.GitHubDetector)},
new(getter.GitDetector),
// Because historically BitBucket supported both Git and Mercurial
// repositories but used the same repository URL syntax for both,
// this detector takes the unusual step of actually reaching out
// to the BitBucket API to recognize the repository type. That
// means there's the possibility of an outgoing network request
// inside what is otherwise normally just a local string manipulation
// operation, but we continue to accept this for now.
//
// Perhaps a future version of go-getter will remove the check now
// that BitBucket only supports Git anyway. Aside from this historical
// exception, we should avoid adding any new detectors that make network
// requests in here, and limit ourselves only to ones that can operate
// entirely through local string manipulation.
new(getter.BitBucketDetector),
new(getter.GCSDetector),
new(getter.S3Detector),
new(fileDetector),
}
var goGetterNoDetectors = []getter.Detector{}
var goGetterDecompressors = map[string]getter.Decompressor{
"bz2": new(getter.Bzip2Decompressor),
"gz": new(getter.GzipDecompressor),
"xz": new(getter.XzDecompressor),
"zip": new(getter.ZipDecompressor),
"tar.bz2": new(getter.TarBzip2Decompressor),
"tar.tbz2": new(getter.TarBzip2Decompressor),
"tar.gz": new(getter.TarGzipDecompressor),
"tgz": new(getter.TarGzipDecompressor),
"tar.xz": new(getter.TarXzDecompressor),
"txz": new(getter.TarXzDecompressor),
}
// This is a map from media types as used in OCI descriptors to the keys in
// [goGetterDecompressors] whose decompressor can be used for each type.
//
// This intentionally covers only the subset of archive formats we support
// for module packages retrieved from OCI distribution repositories, but
// is defined in here so we can more easily correlate it with
// goGetterDecompressors when adding new entries.
//
// If this map grows in future then any new keys must also appear somewhere
// in [ociBlobMediaTypePreference].
var goGetterDecompressorMediaTypes = map[string]string{
"archive/zip": "zip",
}
// goGetterGetters is an initial table of getters that we use as a starting
// point when building a _real_ table of getters to pass into a
// [reusingGetter] instance.
//
// The elements mapped to nil here are those which are populated dynamically
// based on arguments to [NewPackageFetcher], included here only so it's
// easier to refer to the entire list of supported getter keys in one place.
var goGetterGetters = map[string]getter.Getter{
"file": new(getter.FileGetter),
"gcs": new(getter.GCSGetter),
"git": new(getter.GitGetter),
"hg": new(getter.HgGetter),
"http": nil, // configured dynamically in NewPackageFetcher
"https": nil, // configured dynamically in NewPackageFetcher
"oci": nil, // configured dynamically using [PackageFetcherEnvironment.OCIRepositoryStore]
"s3": new(getter.S3Getter),
}
// A reusingGetter is a helper for the module installer that remembers
// the final resolved addresses of all of the sources it has already been
// asked to install, and will copy from a prior installation directory if
// it has the same resolved source address.
//
// The keys in a reusingGetter are the normalized (post-detection) package
// addresses, and the values are the paths where each source was previously
// installed. (Users of this map should treat the keys as addrs.ModulePackage
// values, but we can't type them that way because the addrs package
// imports getmodules in order to indirectly access our go-getter
// configuration.)
type reusingGetter struct {
// getters are the go-getter getters that this particular instance of
// reusingGetter should use.
getters map[string]getter.Getter
previousInstalls map[string]string // initialized on first install request
previousInstallsMu sync.Mutex // must hold while interacting with previousInstalls
}
func newReusingGetter(getters map[string]getter.Getter) *reusingGetter {
return &reusingGetter{
getters: getters,
// previousInstalls initialized only on request
}
}
// getWithGoGetter fetches the package at the given address into the given
// target directory. The given address must already be in normalized form
// (using NormalizePackageAddress) or else the behavior is undefined.
//
// This function deals only in entire packages, so it's always the caller's
// responsibility to handle any subdirectory specification and select a
// suitable subdirectory of the given installation directory after installation
// has succeeded.
//
// This function would ideally accept packageAddr as a value of type
// addrs.ModulePackage, but we can't do that because the addrs package
// depends on this package for package address parsing. Therefore we just
// use a string here but assume that the caller got that value by calling
// the String method on a valid addrs.ModulePackage value.
//
// The errors returned by this function are those surfaced by the underlying
// go-getter library, which have very inconsistent quality as
// end-user-actionable error messages. At this time we do not have any
// reasonable way to improve these error messages at this layer because
// the underlying errors are not separately recognizable.
func (g *reusingGetter) getWithGoGetter(ctx context.Context, instPath, packageAddr string) error {
var err error
// For now we hold the "previousInstalls" mutex throughout our entire work here
// since we don't currently try to use a single getter concurrently anyway.
// If we _do_ want to enable more concurrency in future then we'll need a
// more interesting strategy to make sure that only concurrent attempts to
// install the _same_ package get serialized, but we'll wait until we have
// that need before we introduce that complexity.
//
// NOTE WELL: [getter.Client.Get] modifies internal state inside each of the
// getters passed in [getter.Client.Getters] before calling into them, so
// it is _not_ safe to reuse the same getter instances across multiple
// concurrent calls. If we want to make this work concurrently in future
// then we'll need to instead instantiate the getters on demand for each
// request.
g.previousInstallsMu.Lock()
defer g.previousInstallsMu.Unlock()
if g.previousInstalls == nil {
g.previousInstalls = make(map[string]string)
}
if prevDir, exists := g.previousInstalls[packageAddr]; exists {
log.Printf("[TRACE] getmodules: copying previous install of %q from %s to %s", packageAddr, prevDir, instPath)
err := os.Mkdir(instPath, os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create directory %s: %w", instPath, err)
}
err = copy.CopyDir(instPath, prevDir)
if err != nil {
return fmt.Errorf("failed to copy from %s to %s: %w", prevDir, instPath, err)
}
} else {
log.Printf("[TRACE] getmodules: fetching %q to %q", packageAddr, instPath)
client := getter.Client{
Src: packageAddr,
Dst: instPath,
Pwd: instPath,
Mode: getter.ClientModeDir,
Detectors: goGetterNoDetectors, // our caller should've already done detection
Decompressors: goGetterDecompressors,
Getters: g.getters,
Ctx: ctx,
}
err = client.Get()
if err != nil {
return err
}
// Remember where we installed this so we might reuse this directory
// on subsequent calls to avoid re-downloading.
g.previousInstalls[packageAddr] = instPath
}
// If we get down here then we've either downloaded the package or
// copied a previous tree we downloaded, and so either way we should
// have got the full module package structure written into instPath.
return nil
}
// withoutQueryParams implements getter.Detector and can be used to wrap another detector.
// This will look for any query params that might exist in the src and strip that away before calling
// getter.Detector#Detect. After the response is returned, the query params are attached back to the resulted src.
type withoutQueryParams struct {
d getter.Detector
}
func (w *withoutQueryParams) Detect(src string, pwd string) (string, bool, error) {
var qp string
if idx := strings.Index(src, "?"); idx > -1 {
qp = src[idx+1:]
src = src[:idx]
}
src, ok, err := w.d.Detect(src, pwd)
// Attach the query params only when the wrapped detector returns a value back
if len(src) > 0 && len(qp) > 0 {
src += "?" + qp
}
return src, ok, err
}