From 607d74c88214ee4ab29572e29a795928abe57786 Mon Sep 17 00:00:00 2001 From: Christian Mesh Date: Mon, 1 Dec 2025 11:28:52 -0500 Subject: [PATCH] Defer provider checksum and parallelize schema fetching (#2730) Signed-off-by: Christian Mesh --- CHANGELOG.md | 1 + .../command/e2etest/providers_tamper_test.go | 8 +- internal/command/meta_providers.go | 81 ++++++------ internal/tofu/schemas.go | 123 +++++++++--------- 4 files changed, 112 insertions(+), 101 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74e3cb5f7f..c83075b206 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ ENHANCEMENTS: - `prevent_destroy` arguments in the `lifecycle` block for managed resources can now use references to other symbols in the same module, such as to a module's input variables. ([#3474](https://github.com/opentofu/opentofu/issues/3474), [#3507](https://github.com/opentofu/opentofu/issues/3507)) - OpenTofu now uses the `BROWSER` environment variable when launching a web browser on Unix platforms, as long as it's set to a single command that can accept a URL to open as its first and only argument. ([#3456](https://github.com/opentofu/opentofu/issues/3456)) +- Improve performance around provider checking and schema management. ([#2730](https://github.com/opentofu/opentofu/pull/2730)) BUG FIXES: diff --git a/internal/command/e2etest/providers_tamper_test.go b/internal/command/e2etest/providers_tamper_test.go index 9baa856b57..b6aa3c9422 100644 --- a/internal/command/e2etest/providers_tamper_test.go +++ b/internal/command/e2etest/providers_tamper_test.go @@ -73,7 +73,7 @@ func TestProviderTampering(t *testing.T) { if err == nil { t.Fatalf("unexpected plan success\nstdout:\n%s", stdout) } - if want := `registry.opentofu.org/hashicorp/null: there is no package for registry.opentofu.org/hashicorp/null 3.1.0 cached in ` + providerCacheDir; !strings.Contains(stderr, want) { + if want := `there is no package for registry.opentofu.org/hashicorp/null 3.1.0 cached in ` + providerCacheDir; !strings.Contains(SanitizeStderr(stderr), want) { t.Errorf("missing expected error message\nwant substring: %s\ngot:\n%s", want, stderr) } if want := `tofu init`; !strings.Contains(stderr, want) { @@ -138,7 +138,7 @@ func TestProviderTampering(t *testing.T) { if err == nil { t.Fatalf("unexpected plan success\nstdout:\n%s", stdout) } - if want := `registry.opentofu.org/hashicorp/null: the cached package for registry.opentofu.org/hashicorp/null 3.1.0 (in ` + providerCacheDir + `) does not match any of the checksums recorded in the dependency lock file`; !strings.Contains(stderr, want) { + if want := `the cached package for registry.opentofu.org/hashicorp/null 3.1.0 (in ` + providerCacheDir + `) does not match any of the checksums recorded in the dependency lock file`; !strings.Contains(SanitizeStderr(stderr), want) { t.Errorf("missing expected error message\nwant substring: %s\ngot:\n%s", want, stderr) } if want := `tofu init`; !strings.Contains(stderr, want) { @@ -243,7 +243,7 @@ func TestProviderTampering(t *testing.T) { if err == nil { t.Fatalf("unexpected apply success\nstdout:\n%s", stdout) } - if want := `registry.opentofu.org/hashicorp/null: there is no package for registry.opentofu.org/hashicorp/null 3.1.0 cached in ` + providerCacheDir; !strings.Contains(stderr, want) { + if want := `there is no package for registry.opentofu.org/hashicorp/null 3.1.0 cached in ` + providerCacheDir; !strings.Contains(SanitizeStderr(stderr), want) { t.Errorf("missing expected error message\nwant substring: %s\ngot:\n%s", want, stderr) } }) @@ -265,7 +265,7 @@ func TestProviderTampering(t *testing.T) { if err == nil { t.Fatalf("unexpected apply success\nstdout:\n%s", stdout) } - if want := `registry.opentofu.org/hashicorp/null: the cached package for registry.opentofu.org/hashicorp/null 3.1.0 (in ` + providerCacheDir + `) does not match any of the checksums recorded in the dependency lock file`; !strings.Contains(stderr, want) { + if want := `the cached package for registry.opentofu.org/hashicorp/null 3.1.0 (in ` + providerCacheDir + `) does not match any of the checksums recorded in the dependency lock file`; !strings.Contains(SanitizeStderr(stderr), want) { t.Errorf("missing expected error message\nwant substring: %s\ngot:\n%s", want, stderr) } }) diff --git a/internal/command/meta_providers.go b/internal/command/meta_providers.go index 004083ef2f..6e43c00bef 100644 --- a/internal/command/meta_providers.go +++ b/internal/command/meta_providers.go @@ -14,6 +14,7 @@ import ( "os" "os/exec" "strings" + "sync" plugin "github.com/hashicorp/go-plugin" @@ -281,15 +282,6 @@ func (m *Meta) providerFactories() (map[addrs.Provider]providers.Factory, error) factories[addrs.NewBuiltInProvider(name)] = factory } for provider, lock := range providerLocks { - reportError := func(thisErr error) { - errs[provider] = thisErr - // We'll populate a provider factory that just echoes our error - // again if called, which allows us to still report a helpful - // error even if it gets detected downstream somewhere from the - // caller using our partial result. - factories[provider] = providerFactoryError(thisErr) - } - if locks.ProviderIsOverridden(provider) { // Overridden providers we'll handle with the other separate // loops below, for dev overrides etc. @@ -298,33 +290,54 @@ func (m *Meta) providerFactories() (map[addrs.Provider]providers.Factory, error) version := lock.Version() cached := cacheDir.ProviderVersion(provider, version) + if cached == nil { - reportError(fmt.Errorf( + errs[provider] = fmt.Errorf( "there is no package for %s %s cached in %s", provider, version, cacheDir.BasePath(), - )) + ) continue } - // The cached package must match one of the checksums recorded in - // the lock file, if any. - if allowedHashes := lock.PreferredHashes(); len(allowedHashes) != 0 { - matched, err := cached.MatchesAnyHash(allowedHashes) - if err != nil { - reportError(fmt.Errorf( - "failed to verify checksum of %s %s package cached in in %s: %w", - provider, version, cacheDir.BasePath(), err, - )) - continue - } - if !matched { - reportError(fmt.Errorf( - "the cached package for %s %s (in %s) does not match any of the checksums recorded in the dependency lock file", - provider, version, cacheDir.BasePath(), - )) - continue + + checkProvider := func() error { + // The cached package must match one of the checksums recorded in + // the lock file, if any. + if allowedHashes := lock.PreferredHashes(); len(allowedHashes) != 0 { + matched, err := cached.MatchesAnyHash(allowedHashes) + if err != nil { + return fmt.Errorf( + "failed to verify checksum of %s %s package cached in in %s: %w", + provider, version, cacheDir.BasePath(), err, + ) + } + if !matched { + return fmt.Errorf( + "the cached package for %s %s (in %s) does not match any of the checksums recorded in the dependency lock file, run tofu init to ensure all providers are correctly installed", + provider, version, cacheDir.BasePath(), + ) + } } + return nil + } + + var checkLock sync.Mutex + checkedProvider := false + var checkErr error + + factories[provider] = func() (providers.Interface, error) { + checkLock.Lock() + if !checkedProvider { + checkedProvider = true + checkErr = checkProvider() + } + checkLock.Unlock() + + if checkErr != nil { + return nil, checkErr + } + + return providerFactory(cached)() } - factories[provider] = providerFactory(cached) } for provider, localDir := range devOverrideProviders { factories[provider] = devOverrideProviderFactory(provider, localDir) @@ -481,16 +494,6 @@ func unmanagedProviderFactory(provider addrs.Provider, reattach *plugin.Reattach } } -// providerFactoryError is a stub providers.Factory that returns an error -// when called. It's used to allow providerFactories to still produce a -// factory for each available provider in an error case, for situations -// where the caller can do something useful with that partial result. -func providerFactoryError(err error) providers.Factory { - return func() (providers.Interface, error) { - return nil, err - } -} - // providerPluginErrors is an error implementation we can return from // Meta.providerFactories to capture potentially multiple errors about the // locally-cached plugins (or lack thereof) for particular external providers. diff --git a/internal/tofu/schemas.go b/internal/tofu/schemas.go index 1d0ffd8a0f..dc16690679 100644 --- a/internal/tofu/schemas.go +++ b/internal/tofu/schemas.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "log" + "sync" "github.com/opentofu/opentofu/internal/addrs" "github.com/opentofu/opentofu/internal/configs" @@ -71,71 +72,92 @@ func (ss *Schemas) ProvisionerConfig(name string) *configschema.Block { // protocol itself. When returned with errors, the returned schemas object is // still valid but may be incomplete. func loadSchemas(ctx context.Context, config *configs.Config, state *states.State, plugins *contextPlugins) (*Schemas, error) { - schemas := &Schemas{ - Providers: map[addrs.Provider]providers.ProviderSchema{}, - Provisioners: map[string]*configschema.Block{}, - } var diags tfdiags.Diagnostics - provisionerDiags := loadProvisionerSchemas(ctx, schemas.Provisioners, config, plugins) + provisioners, provisionerDiags := loadProvisionerSchemas(ctx, config, plugins) diags = diags.Append(provisionerDiags) - providerDiags := loadProviderSchemas(ctx, schemas.Providers, config, state, plugins) + providers, providerDiags := loadProviderSchemas(ctx, config, state, plugins) diags = diags.Append(providerDiags) - return schemas, diags.Err() + return &Schemas{ + Providers: providers, + Provisioners: provisioners, + }, diags.Err() } -func loadProviderSchemas(ctx context.Context, schemas map[addrs.Provider]providers.ProviderSchema, config *configs.Config, state *states.State, plugins *contextPlugins) tfdiags.Diagnostics { +func loadProviderSchemas(ctx context.Context, config *configs.Config, state *states.State, plugins *contextPlugins) (map[addrs.Provider]providers.ProviderSchema, tfdiags.Diagnostics) { var diags tfdiags.Diagnostics - - ensure := func(fqn addrs.Provider) { - name := fqn.String() - - if _, exists := schemas[fqn]; exists { - return - } - - log.Printf("[TRACE] LoadSchemas: retrieving schema for provider type %q", name) - schema, err := plugins.ProviderSchema(ctx, fqn) - if err != nil { - // We'll put a stub in the map so we won't re-attempt this on - // future calls, which would then repeat the same error message - // multiple times. - schemas[fqn] = providers.ProviderSchema{} - diags = diags.Append( - tfdiags.Sourceless( - tfdiags.Error, - "Failed to obtain provider schema", - fmt.Sprintf("Could not load the schema for provider %s: %s.", fqn, err), - ), - ) - return - } - - schemas[fqn] = schema - } + schemas := map[addrs.Provider]providers.ProviderSchema{} if config != nil { for _, fqn := range config.ProviderTypes() { - ensure(fqn) + schemas[fqn] = providers.ProviderSchema{} } } if state != nil { needed := providers.AddressedTypesAbs(state.ProviderAddrs()) - for _, typeAddr := range needed { - ensure(typeAddr) + for _, fqn := range needed { + schemas[fqn] = providers.ProviderSchema{} } } - return diags + var wg sync.WaitGroup + var lock sync.Mutex + lock.Lock() // Prevent anything from started until we have finished schema map reads + for fqn := range schemas { + wg.Go(func() { + log.Printf("[TRACE] LoadSchemas: retrieving schema for provider type %q", fqn.String()) + schema, err := plugins.ProviderSchema(ctx, fqn) + + // Ensure that we don't race on diags or schemas now that the hard work is done + lock.Lock() + defer lock.Unlock() + + if err != nil { + diags = diags.Append(err) + return + } + + schemas[fqn] = schema + }) + } + + // Allow execution to start now that reading of schemas map has completed + lock.Unlock() + + // Wait for all of the scheduled routines to complete + wg.Wait() + + return schemas, diags } -func loadProvisionerSchemas(ctx context.Context, schemas map[string]*configschema.Block, config *configs.Config, plugins *contextPlugins) tfdiags.Diagnostics { +func loadProvisionerSchemas(ctx context.Context, config *configs.Config, plugins *contextPlugins) (map[string]*configschema.Block, tfdiags.Diagnostics) { var diags tfdiags.Diagnostics + schemas := map[string]*configschema.Block{} - ensure := func(name string) { + // Determine the full list of provisioners recursively + var addProvisionersToSchema func(config *configs.Config) + addProvisionersToSchema = func(config *configs.Config) { + if config == nil { + return + } + for _, rc := range config.Module.ManagedResources { + for _, pc := range rc.Managed.Provisioners { + schemas[pc.Type] = &configschema.Block{} + } + } + + // Must also visit our child modules, recursively. + for _, cc := range config.Children { + addProvisionersToSchema(cc) + } + } + addProvisionersToSchema(config) + + // Populate the schema entries + for name := range schemas { log.Printf("[TRACE] LoadSchemas: retrieving schema for provisioner %q", name) schema, err := plugins.ProvisionerSchema(name) if err != nil { @@ -150,26 +172,11 @@ func loadProvisionerSchemas(ctx context.Context, schemas map[string]*configschem fmt.Sprintf("Could not load the schema for provisioner %q: %s.", name, err), ), ) - - return + continue } schemas[name] = schema } - if config != nil { - for _, rc := range config.Module.ManagedResources { - for _, pc := range rc.Managed.Provisioners { - ensure(pc.Type) - } - } - - // Must also visit our child modules, recursively. - for _, cc := range config.Children { - childDiags := loadProvisionerSchemas(ctx, schemas, cc, plugins) - diags = diags.Append(childDiags) - } - } - - return diags + return schemas, diags }