Remove global provider schema cache

Signed-off-by: Christian Mesh <christianmesh1@gmail.com>
2025-12-19 17:59:05 -05:00 · 2025-12-11 11:26:57 -05:00
parent 271dac36a4
commit 82044d4362
5 changed files with 81 additions and 174 deletions
--- a/internal/command/meta_providers.go
+++ b/internal/command/meta_providers.go
@@ -361,10 +361,28 @@ func (m *Meta) internalProviders() map[string]providers.Factory {
 	}
 }

+func providerSchemaCache() func(func() providers.ProviderSchema) providers.ProviderSchema {
+	var mu sync.Mutex
+	var schema providers.ProviderSchema
+
+	return func(getSchema func() providers.ProviderSchema) providers.ProviderSchema {
+		mu.Lock()
+		defer mu.Unlock()
+
+		if schema.Provider.Block != nil {
+			return schema
+		}
+		schema = getSchema()
+		return schema
+	}
+}
+
 // providerFactory produces a provider factory that runs up the executable
 // file in the given cache package and uses go-plugin to implement
 // providers.Interface against it.
 func providerFactory(meta *providercache.CachedProvider) providers.Factory {
+	schemaCache := providerSchemaCache()
+
 	return func() (providers.Interface, error) {
 		execFile, err := meta.ExecutableFile()
 		if err != nil {
@@ -395,7 +413,7 @@ func providerFactory(meta *providercache.CachedProvider) providers.Factory {
 		}

 		protoVer := client.NegotiatedVersion()
-		p, err := initializeProviderInstance(raw, protoVer, client, meta.Provider)
+		p, err := initializeProviderInstance(raw, protoVer, client, schemaCache)
 		if errors.Is(err, errUnsupportedProtocolVersion) {
 			panic(err)
 		}
@@ -406,18 +424,18 @@ func providerFactory(meta *providercache.CachedProvider) providers.Factory {

 // initializeProviderInstance uses the plugin dispensed by the RPC client, and initializes a plugin instance
 // per the protocol version
-func initializeProviderInstance(plugin interface{}, protoVer int, pluginClient *plugin.Client, pluginAddr addrs.Provider) (providers.Interface, error) {
+func initializeProviderInstance(plugin interface{}, protoVer int, pluginClient *plugin.Client, schemaCache func(func() providers.ProviderSchema) providers.ProviderSchema) (providers.Interface, error) {
 	// store the client so that the plugin can kill the child process
 	switch protoVer {
 	case 5:
 		p := plugin.(*tfplugin.GRPCProvider)
 		p.PluginClient = pluginClient
-		p.Addr = pluginAddr
+		p.SchemaCache = schemaCache
 		return p, nil
 	case 6:
 		p := plugin.(*tfplugin6.GRPCProvider)
 		p.PluginClient = pluginClient
-		p.Addr = pluginAddr
+		p.SchemaCache = schemaCache
 		return p, nil
 	default:
 		return nil, errUnsupportedProtocolVersion
@@ -441,6 +459,8 @@ func devOverrideProviderFactory(provider addrs.Provider, localDir getproviders.P
 // reattach information to connect to go-plugin processes that are already
 // running, and implements providers.Interface against it.
 func unmanagedProviderFactory(provider addrs.Provider, reattach *plugin.ReattachConfig) providers.Factory {
+	schemaCache := providerSchemaCache()
+
 	return func() (providers.Interface, error) {
 		config := &plugin.ClientConfig{
 			HandshakeConfig:  tfplugin.Handshake,
@@ -490,7 +510,7 @@ func unmanagedProviderFactory(provider addrs.Provider, reattach *plugin.Reattach
 			protoVer = 5
 		}

-		return initializeProviderInstance(raw, protoVer, client, provider)
+		return initializeProviderInstance(raw, protoVer, client, schemaCache)
 	}
 }

--- a/internal/plugin/grpc_provider.go
+++ b/internal/plugin/grpc_provider.go
@@ -9,7 +9,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"sync"

 	plugin "github.com/hashicorp/go-plugin"
 	"github.com/opentofu/opentofu/internal/plugin/validation"
@@ -18,7 +17,6 @@ import (
 	"github.com/zclconf/go-cty/cty/msgpack"
 	"google.golang.org/grpc"

-	"github.com/opentofu/opentofu/internal/addrs"
 	"github.com/opentofu/opentofu/internal/logging"
 	"github.com/opentofu/opentofu/internal/plugin/convert"
 	"github.com/opentofu/opentofu/internal/providers"
@@ -27,6 +25,16 @@ import (

 var logger = logging.HCLogger()

+// Some providers may generate quite large schemas, and the internal default
+// grpc response size limit is 4MB. 64MB should cover most any use case, and
+// if we get providers nearing that we may want to consider a finer-grained
+// API to fetch individual resource schemas.
+// Note: this option is marked as EXPERIMENTAL in the grpc API. We keep
+// this for compatibility, but recent providers all set the max message
+// size much higher on the server side, which is the supported method for
+// determining payload size.
+const maxRecvSize = 64 << 20
+
 // GRPCProviderPlugin implements plugin.GRPCPlugin for the go-plugin package.
 type GRPCProviderPlugin struct {
 	plugin.Plugin
@@ -75,11 +83,6 @@ type GRPCProvider struct {
 	// used in an end to end test of a provider.
 	TestServer *grpc.Server

-	// Addr uniquely identifies the type of provider.
-	// Normally executed providers will have this set during initialization,
-	// but it may not always be available for alternative execute modes.
-	Addr addrs.Provider
-
 	// Proto client use to make the grpc service calls.
 	client proto.ProviderClient

@@ -94,53 +97,39 @@ type GRPCProvider struct {
 	// to use as the parent context for gRPC API calls.
 	ctx context.Context

-	mu sync.Mutex
-	// schema stores the schema for this provider. This is used to properly
-	// serialize the requests for schemas.
-	schema providers.GetProviderSchemaResponse
+	// SchemaCache stores the schema for this provider. This is used to properly
+	// serialize the requests for schemas.  This is shared between instances
+	// of the provider.
+	SchemaCache      func(func() providers.GetProviderSchemaResponse) providers.GetProviderSchemaResponse
+	hasFetchedSchema bool
 }

 var _ providers.Interface = new(GRPCProvider)

 func (p *GRPCProvider) GetProviderSchema(ctx context.Context) (resp providers.GetProviderSchemaResponse) {
 	logger.Trace("GRPCProvider: GetProviderSchema")
-	p.mu.Lock()
-	defer p.mu.Unlock()

-	// First, we check the global cache.
-	// The cache could contain this schema if an instance of this provider has previously been started.
-	if !p.Addr.IsZero() {
-		// Even if the schema is cached, GetProviderSchemaOptional could be false. This would indicate that once instantiated,
-		// this provider requires the get schema call to be made at least once, as it handles part of the provider's setup.
-		// At this point, we don't know if this is the first call to a provider instance or not, so we don't use the result in that case.
-		if schemaCached, ok := providers.SchemaCache.Get(p.Addr); ok && schemaCached.ServerCapabilities.GetProviderSchemaOptional {
-			logger.Trace("GRPCProvider: GetProviderSchema: serving from global schema cache", "address", p.Addr)
-			return schemaCached
-		}
-	}
-
-	// If the local cache is non-zero, we know this instance has called
-	// GetProviderSchema at least once, so has satisfied the possible requirement of `GetProviderSchemaOptional=false`.
-	// This means that we can return early now using the locally cached schema, without making this call again.
-	if p.schema.Provider.Block != nil {
-		return p.schema
+	schema := p.SchemaCache(func() providers.GetProviderSchemaResponse {
+		return p.getProviderSchema(ctx)
+	})
+
+	if !p.hasFetchedSchema && !schema.ServerCapabilities.GetProviderSchemaOptional {
+		// Force call
+		p.client.GetSchema(ctx, new(proto.GetProviderSchema_Request), grpc.MaxRecvMsgSizeCallOption{MaxRecvMsgSize: maxRecvSize})
+		p.hasFetchedSchema = true
 	}

+	return schema
+}
+func (p *GRPCProvider) getProviderSchema(ctx context.Context) (resp providers.GetProviderSchemaResponse) {
 	resp.ResourceTypes = make(map[string]providers.Schema)
 	resp.DataSources = make(map[string]providers.Schema)
 	resp.EphemeralResources = make(map[string]providers.Schema)
 	resp.Functions = make(map[string]providers.FunctionSpec)

-	// Some providers may generate quite large schemas, and the internal default
-	// grpc response size limit is 4MB. 64MB should cover most any use case, and
-	// if we get providers nearing that we may want to consider a finer-grained
-	// API to fetch individual resource schemas.
-	// Note: this option is marked as EXPERIMENTAL in the grpc API. We keep
-	// this for compatibility, but recent providers all set the max message
-	// size much higher on the server side, which is the supported method for
-	// determining payload size.
-	const maxRecvSize = 64 << 20
 	protoResp, err := p.client.GetSchema(ctx, new(proto.GetProviderSchema_Request), grpc.MaxRecvMsgSizeCallOption{MaxRecvMsgSize: maxRecvSize})
+	p.hasFetchedSchema = true
+
 	if err != nil {
 		resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
 		return resp
@@ -188,23 +177,6 @@ func (p *GRPCProvider) GetProviderSchema(ctx context.Context) (resp providers.Ge
 		resp.ServerCapabilities.GetProviderSchemaOptional = protoResp.ServerCapabilities.GetProviderSchemaOptional
 	}

-	// Set the global provider cache so that future calls to this provider can use the cached value.
-	// Crucially, this doesn't look at GetProviderSchemaOptional, because the layers above could use this cache
-	// *without* creating an instance of this provider. And if there is no instance,
-	// then we don't need to set up anything (cause there is nothing to set up), so we need no call
-	// to the providers GetSchema rpc.
-	if !p.Addr.IsZero() {
-		providers.SchemaCache.Set(p.Addr, resp)
-	}
-
-	// Always store this here in the client for providers that are not able to use GetProviderSchemaOptional.
-	// Crucially, this indicates that we've made at least one call to GetProviderSchema to this instance of the provider,
-	// which means in the future we'll be able to return using this cache
-	// (because the possible setup contained in the GetProviderSchema call has happened).
-	// If GetProviderSchemaOptional is true then this cache won't actually ever be used, because the calls to this method
-	// will be satisfied by the global provider cache.
-	p.schema = resp
-
 	return resp
 }

--- a/internal/plugin6/grpc_provider.go
+++ b/internal/plugin6/grpc_provider.go
@@ -9,7 +9,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"sync"

 	plugin "github.com/hashicorp/go-plugin"
 	"github.com/opentofu/opentofu/internal/plugin6/validation"
@@ -18,7 +17,6 @@ import (
 	"github.com/zclconf/go-cty/cty/msgpack"
 	"google.golang.org/grpc"

-	"github.com/opentofu/opentofu/internal/addrs"
 	"github.com/opentofu/opentofu/internal/logging"
 	"github.com/opentofu/opentofu/internal/plugin6/convert"
 	"github.com/opentofu/opentofu/internal/providers"
@@ -27,6 +25,16 @@ import (

 var logger = logging.HCLogger()

+// Some providers may generate quite large schemas, and the internal default
+// grpc response size limit is 4MB. 64MB should cover most any use case, and
+// if we get providers nearing that we may want to consider a finer-grained
+// API to fetch individual resource schemas.
+// Note: this option is marked as EXPERIMENTAL in the grpc API. We keep
+// this for compatibility, but recent providers all set the max message
+// size much higher on the server side, which is the supported method for
+// determining payload size.
+const maxRecvSize = 64 << 20
+
 // GRPCProviderPlugin implements plugin.GRPCPlugin for the go-plugin package.
 type GRPCProviderPlugin struct {
 	plugin.Plugin
@@ -75,11 +83,6 @@ type GRPCProvider struct {
 	// used in an end to end test of a provider.
 	TestServer *grpc.Server

-	// Addr uniquely identifies the type of provider.
-	// Normally executed providers will have this set during initialization,
-	// but it may not always be available for alternative execute modes.
-	Addr addrs.Provider
-
 	// Proto client use to make the grpc service calls.
 	client proto6.ProviderClient

@@ -94,53 +97,40 @@ type GRPCProvider struct {
 	// to use as the parent context for gRPC API calls.
 	ctx context.Context

-	mu sync.Mutex
-	// schema stores the schema for this provider. This is used to properly
-	// serialize the requests for schemas.
-	schema providers.GetProviderSchemaResponse
+	// SchemaCache stores the schema for this provider. This is used to properly
+	// serialize the requests for schemas.  This is shared between instances
+	// of the provider.
+	SchemaCache      func(func() providers.GetProviderSchemaResponse) providers.GetProviderSchemaResponse
+	hasFetchedSchema bool
 }

 var _ providers.Interface = new(GRPCProvider)

 func (p *GRPCProvider) GetProviderSchema(ctx context.Context) (resp providers.GetProviderSchemaResponse) {
 	logger.Trace("GRPCProvider.v6: GetProviderSchema")
-	p.mu.Lock()
-	defer p.mu.Unlock()

-	// First, we check the global cache.
-	// The cache could contain this schema if an instance of this provider has previously been started.
-	if !p.Addr.IsZero() {
-		// Even if the schema is cached, GetProviderSchemaOptional could be false. This would indicate that once instantiated,
-		// this provider requires the get schema call to be made at least once, as it handles part of the provider's setup.
-		// At this point, we don't know if this is the first call to a provider instance or not, so we don't use the result in that case.
-		if schemaCached, ok := providers.SchemaCache.Get(p.Addr); ok && schemaCached.ServerCapabilities.GetProviderSchemaOptional {
-			logger.Trace("GRPCProvider: GetProviderSchema: serving from global schema cache", "address", p.Addr)
-			return schemaCached
-		}
+	schema := p.SchemaCache(func() providers.GetProviderSchemaResponse {
+		return p.getProviderSchema(ctx)
+	})
+
+	if !p.hasFetchedSchema && !schema.ServerCapabilities.GetProviderSchemaOptional {
+		// Force call
+		p.client.GetProviderSchema(ctx, new(proto6.GetProviderSchema_Request), grpc.MaxRecvMsgSizeCallOption{MaxRecvMsgSize: maxRecvSize})
+		p.hasFetchedSchema = true
 	}

-	// If the local cache is non-zero, we know this instance has called
-	// GetProviderSchema at least once, so has satisfied the possible requirement of `GetProviderSchemaOptional=false`.
-	// This means that we can return early now using the locally cached schema, without making this call again.
-	if p.schema.Provider.Block != nil {
-		return p.schema
-	}
+	return schema
+}

+func (p *GRPCProvider) getProviderSchema(ctx context.Context) (resp providers.GetProviderSchemaResponse) {
 	resp.ResourceTypes = make(map[string]providers.Schema)
 	resp.DataSources = make(map[string]providers.Schema)
 	resp.EphemeralResources = make(map[string]providers.Schema)
 	resp.Functions = make(map[string]providers.FunctionSpec)

-	// Some providers may generate quite large schemas, and the internal default
-	// grpc response size limit is 4MB. 64MB should cover most any use case, and
-	// if we get providers nearing that we may want to consider a finer-grained
-	// API to fetch individual resource schemas.
-	// Note: this option is marked as EXPERIMENTAL in the grpc API. We keep
-	// this for compatibility, but recent providers all set the max message
-	// size much higher on the server side, which is the supported method for
-	// determining payload size.
-	const maxRecvSize = 64 << 20
 	protoResp, err := p.client.GetProviderSchema(ctx, new(proto6.GetProviderSchema_Request), grpc.MaxRecvMsgSizeCallOption{MaxRecvMsgSize: maxRecvSize})
+	p.hasFetchedSchema = true
+
 	if err != nil {
 		resp.Diagnostics = resp.Diagnostics.Append(grpcErr(err))
 		return resp
@@ -188,23 +178,6 @@ func (p *GRPCProvider) GetProviderSchema(ctx context.Context) (resp providers.Ge
 		resp.ServerCapabilities.GetProviderSchemaOptional = protoResp.ServerCapabilities.GetProviderSchemaOptional
 	}

-	// Set the global provider cache so that future calls to this provider can use the cached value.
-	// Crucially, this doesn't look at GetProviderSchemaOptional, because the layers above could use this cache
-	// *without* creating an instance of this provider. And if there is no instance,
-	// then we don't need to set up anything (cause there is nothing to set up), so we need no call
-	// to the providers GetSchema rpc.
-	if !p.Addr.IsZero() {
-		providers.SchemaCache.Set(p.Addr, resp)
-	}
-
-	// Always store this here in the client for providers that are not able to use GetProviderSchemaOptional.
-	// Crucially, this indicates that we've made at least one call to GetProviderSchema to this instance of the provider,
-	// which means in the future we'll be able to return using this cache
-	// (because the possible setup contained in the GetProviderSchema call has happened).
-	// If GetProviderSchemaOptional is true then this cache won't actually ever be used, because the calls to this method
-	// will be satisfied by the global provider cache.
-	p.schema = resp
-
 	return resp
 }

--- a/internal/providers/mock_schema_cache.go
+++ b/internal/providers/mock_schema_cache.go
@@ -1,9 +0,0 @@
-package providers
-
-import "github.com/opentofu/opentofu/internal/addrs"
-
-func NewMockSchemaCache() *schemaCache {
-	return &schemaCache{
-		m: make(map[addrs.Provider]ProviderSchema),
-	}
-}
--- a/internal/providers/schema_cache.go
+++ b/internal/providers/schema_cache.go
@@ -1,49 +0,0 @@
-// Copyright (c) The OpenTofu Authors
-// SPDX-License-Identifier: MPL-2.0
-// Copyright (c) 2023 HashiCorp, Inc.
-// SPDX-License-Identifier: MPL-2.0
-
-package providers
-
-import (
-	"sync"
-
-	"github.com/opentofu/opentofu/internal/addrs"
-)
-
-// SchemaCache is a global cache of Schemas.
-// This will be accessed by both core and the provider clients to ensure that
-// large schemas are stored in a single location.
-var SchemaCache = &schemaCache{
-	m: make(map[addrs.Provider]ProviderSchema),
-}
-
-// Global cache for provider schemas
-// Cache the entire response to ensure we capture any new fields, like
-// ServerCapabilities. This also serves to capture errors so that multiple
-// concurrent calls resulting in an error can be handled in the same manner.
-type schemaCache struct {
-	mu sync.Mutex
-	m  map[addrs.Provider]ProviderSchema
-}
-
-func (c *schemaCache) Set(p addrs.Provider, s ProviderSchema) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-
-	c.m[p] = s
-}
-
-func (c *schemaCache) Get(p addrs.Provider) (ProviderSchema, bool) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-
-	s, ok := c.m[p]
-	return s, ok
-}
-
-func (c *schemaCache) Remove(p addrs.Provider) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	delete(c.m, p)
-}