Files
opentf/internal/tofu/transform_reference.go
Martin Atkins 971513049d tofu: GraphNodeDestroyer can now have references
Previously the ReferenceTransformer just had a broad rule that any node
which implements GraphNodeDestroyer has its expression references
completely ignored.

Now that we're starting to allow dynamic expressions for destroy-related
settings like "prevent_destroy", we need to be able to represent the
dependencies implied by those expressions.

However, the assumption that configuration is mostly ignored when planning
destroy is load-bearing for minimizing awkward dependency problems in the
destroy-planning graph, so this introduces a new concept of "destroy
references" which means that we can implement only a small, curated subset
of references -- for now, just the ones from prevent_destroy -- that get
considered for any node type that implements GraphNodeDestroyer.

Having GraphNodeDestroyer effectively take priority over
GraphNodeReferencer seems like the least disruptive way to retrofit this
idea surgically as a small change to the previous unilateral rule against
any references at all, because in practice all of the destroy nodes embed
NodeAbstractResourceInstance and therefore implement GraphNodeReferencer,
so it is important that we continue to ignore that type's
GraphNodeReferencer implementation whenever it's embedded in something
that is also a GraphNodeDestroyer.

Signed-off-by: Martin Atkins <mart@degeneration.co.uk>
2025-11-18 06:59:35 -08:00

602 lines
19 KiB
Go

// Copyright (c) The OpenTofu Authors
// SPDX-License-Identifier: MPL-2.0
// Copyright (c) 2023 HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package tofu
import (
"context"
"fmt"
"log"
"sort"
"github.com/hashicorp/hcl/v2"
"github.com/opentofu/opentofu/internal/addrs"
"github.com/opentofu/opentofu/internal/configs/configschema"
"github.com/opentofu/opentofu/internal/dag"
"github.com/opentofu/opentofu/internal/lang"
)
// GraphNodeReferenceable must be implemented by any node that represents
// a OpenTofu thing that can be referenced (resource, module, etc.).
//
// Even if the thing has no name, this should return an empty list. By
// implementing this and returning a non-nil result, you say that this CAN
// be referenced and other methods of referencing may still be possible (such
// as by path!)
type GraphNodeReferenceable interface {
GraphNodeModulePath
// ReferenceableAddrs returns a list of addresses through which this can be
// referenced.
ReferenceableAddrs() []addrs.Referenceable
}
// GraphNodeReferencer must be implemented by nodes that reference other
// OpenTofu items and therefore depend on them.
type GraphNodeReferencer interface {
GraphNodeModulePath
// References returns a list of references made by this node, which
// include both a referenced address and source location information for
// the reference.
References() []*addrs.Reference
}
// GraphNodeRootReferencer is implemented by nodes that reference the root
// module, for example module imports
type GraphNodeRootReferencer interface {
GraphNodeReferencer
RootReferences() []*addrs.Reference
}
type GraphNodeAttachDependencies interface {
GraphNodeConfigResource
AttachDependencies([]addrs.ConfigResource)
}
// graphNodeDependsOn is implemented by resources that need to expose any
// references set via DependsOn in their configuration.
type graphNodeDependsOn interface {
GraphNodeReferencer
DependsOn() []*addrs.Reference
}
// graphNodeAttachResourceDependsOn records all resources that are transitively
// referenced through depends_on in the configuration. This is used by data
// resources and ephemeral resources to determine if they can be processed during the plan,
// or if they need to be further delayed until apply.
// We can only use an addrs.ConfigResource address here, because modules are
// not yet expended in the graph. While this will cause some extra data
// resources to show in the plan when their depends_on references may be in
// unrelated module instances, the fact that it only happens when there are any
// resource updates pending means we can still avoid the problem of the
// "perpetual diff"
type graphNodeAttachResourceDependsOn interface {
GraphNodeConfigResource
graphNodeDependsOn
// AttachResourceDependsOn stores the discovered dependencies in the
// resource node for evaluation later.
//
// The force parameter indicates that even if there are no dependencies,
// force the data source to act as though there are for refresh purposes.
// This is needed because yet-to-be-created resources won't be in the
// initial refresh graph, but may still be referenced through depends_on.
AttachResourceDependsOn(deps []addrs.ConfigResource, force bool)
}
// GraphNodeReferenceOutside is an interface that can optionally be implemented.
// A node that implements it can specify that its own referenceable addresses
// and/or the addresses it references are in a different module than the
// node itself.
//
// Any referenceable addresses returned by ReferenceableAddrs are interpreted
// relative to the returned selfPath.
//
// Any references returned by References are interpreted relative to the
// returned referencePath.
//
// It is valid but not required for either of these paths to match what is
// returned by method Path, though if both match the main Path then there
// is no reason to implement this method.
//
// The primary use-case for this is the nodes representing module input
// variables, since their expressions are resolved in terms of their calling
// module, but they are still referenced from their own module.
type GraphNodeReferenceOutside interface {
// ReferenceOutside returns a path in which any references from this node
// are resolved.
ReferenceOutside() (selfPath, referencePath addrs.Module)
}
// ReferenceTransformer is a GraphTransformer that connects all the
// nodes that reference each other in order to form the proper ordering.
type ReferenceTransformer struct{}
func (t *ReferenceTransformer) Transform(_ context.Context, g *Graph) error {
// Build a reference map so we can efficiently look up the references
vs := g.Vertices()
m := NewReferenceMap(vs)
// Find the things that reference things and connect them
for _, v := range vs {
parents := m.References(v)
parentsDbg := make([]string, len(parents))
for i, v := range parents {
parentsDbg[i] = dag.VertexName(v)
}
log.Printf(
"[DEBUG] ReferenceTransformer: %q references: %v",
dag.VertexName(v), parentsDbg)
for _, parent := range parents {
// A destroy plan relies solely on the state, so we only need to
// ensure that temporary values are connected to get the evaluation
// order correct. Any references to destroy nodes will cause
// cycles, because they are connected in reverse order.
if _, ok := parent.(GraphNodeDestroyer); ok {
continue
}
if !graphNodesAreResourceInstancesInDifferentInstancesOfSameModule(v, parent) {
g.Connect(dag.BasicEdge(v, parent))
} else {
log.Printf("[TRACE] ReferenceTransformer: skipping %s => %s inter-module-instance dependency", dag.VertexName(v), dag.VertexName(parent))
}
}
if len(parents) > 0 {
continue
}
}
return nil
}
type depMap map[string]addrs.ConfigResource
// add stores the vertex if it represents a resource in the
// graph.
func (m depMap) add(v dag.Vertex) {
// we're only concerned with resources which may have changes that
// need to be applied.
switch v := v.(type) {
case GraphNodeResourceInstance:
instAddr := v.ResourceInstanceAddr()
addr := instAddr.ContainingResource().Config()
m[addr.String()] = addr
case GraphNodeConfigResource:
addr := v.ResourceAddr()
m[addr.String()] = addr
}
}
// attachResourceDependsOnTransformer records all resources transitively
// referenced through a configuration depends_on.
// This transformer is applicable strictly for data sources and ephemeral resources.
type attachResourceDependsOnTransformer struct {
}
func (t attachResourceDependsOnTransformer) Transform(_ context.Context, g *Graph) error {
// First we need to make a map of referenceable addresses to their vertices.
// This is very similar to what's done in ReferenceTransformer, but we keep
// implementation separate as they may need to change independently.
vertices := g.Vertices()
refMap := NewReferenceMap(vertices)
for _, v := range vertices {
depender, ok := v.(graphNodeAttachResourceDependsOn)
if !ok {
continue
}
// Only data and ephemeral need to attach depends_on, so they can determine if they
// are eligible to be read during plan.
if depender.ResourceAddr().Resource.Mode != addrs.DataResourceMode && depender.ResourceAddr().Resource.Mode != addrs.EphemeralResourceMode {
continue
}
// depMap will only add resource references then dedupe
deps := make(depMap)
dependsOnDeps, fromModule := refMap.dependsOn(g, depender)
for _, dep := range dependsOnDeps {
// any the dependency
deps.add(dep)
}
res := make([]addrs.ConfigResource, 0, len(deps))
for _, d := range deps {
res = append(res, d)
}
log.Printf("[TRACE] attachResourceDependsOnTransformer: %s depends on %s", depender.ResourceAddr(), res)
depender.AttachResourceDependsOn(res, fromModule)
}
return nil
}
// AttachDependenciesTransformer records all resource dependencies for each
// instance, and attaches the addresses to the node itself. Managed resource
// will record these in the state for proper ordering of destroy operations.
type AttachDependenciesTransformer struct {
}
func (t AttachDependenciesTransformer) Transform(_ context.Context, g *Graph) error {
for _, v := range g.Vertices() {
attacher, ok := v.(GraphNodeAttachDependencies)
if !ok {
continue
}
selfAddr := attacher.ResourceAddr()
ans, err := g.Ancestors(v)
if err != nil {
return err
}
// dedupe addrs when there's multiple instances involved, or
// multiple paths in the un-reduced graph
depMap := map[string]addrs.ConfigResource{}
for _, d := range ans {
var addr addrs.ConfigResource
switch d := d.(type) {
case GraphNodeResourceInstance:
instAddr := d.ResourceInstanceAddr()
addr = instAddr.ContainingResource().Config()
case GraphNodeConfigResource:
addr = d.ResourceAddr()
default:
continue
}
if addr.Equal(selfAddr) {
continue
}
depMap[addr.String()] = addr
}
deps := make([]addrs.ConfigResource, 0, len(depMap))
for _, d := range depMap {
deps = append(deps, d)
}
sort.Slice(deps, func(i, j int) bool {
return deps[i].String() < deps[j].String()
})
log.Printf("[TRACE] AttachDependenciesTransformer: %s depends on %s", attacher.ResourceAddr(), deps)
attacher.AttachDependencies(deps)
}
return nil
}
func isDependableResource(v dag.Vertex) bool {
switch v.(type) {
case GraphNodeResourceInstance:
return true
case GraphNodeConfigResource:
return true
}
return false
}
// ReferenceMap is a structure that can be used to efficiently check
// for references on a graph, mapping internal reference keys (as produced by
// the mapKey method) to one or more vertices that are identified by each key.
type ReferenceMap map[string][]dag.Vertex
// References returns the set of vertices that the given vertex refers to,
// and any referenced addresses that do not have corresponding vertices.
func (m ReferenceMap) References(v dag.Vertex) []dag.Vertex {
var refsFunc func() []*addrs.Reference
if rn, ok := v.(GraphNodeDestroyer); ok {
// For nodes that represent destroying something we use a separate
// method because destroying typically uses only a subset of the
// references that would be relevant for other operations: destroying
// is _mostly_ a state-only operation that only use the configuration
// opportunistically for some metadata that influences how the destroy
// action is planned.
refsFunc = rn.DestroyReferences
} else if rn, ok := v.(GraphNodeReferencer); ok {
refsFunc = rn.References
} else {
return nil
}
var matches []dag.Vertex
if rrn, ok := v.(GraphNodeRootReferencer); ok {
for _, ref := range rrn.RootReferences() {
matches = append(matches, m.addReference(addrs.RootModule, v, ref)...)
}
}
for _, ref := range refsFunc() {
matches = append(matches, m.addReference(vertexReferencePath(v), v, ref)...)
}
return matches
}
// addReference returns the set of vertices that the given reference requires
// within a given module. It additionally excludes the current vertex.
func (m ReferenceMap) addReference(path addrs.Module, current dag.Vertex, ref *addrs.Reference) []dag.Vertex {
var matches []dag.Vertex
subject := ref.Subject
key := m.mapKey(path, subject)
if _, exists := m[key]; !exists {
// If what we were looking for was a ResourceInstance then we
// might be in a resource-oriented graph rather than an
// instance-oriented graph, and so we'll see if we have the
// resource itself instead.
switch ri := subject.(type) {
case addrs.ResourceInstance:
subject = ri.ContainingResource()
case addrs.ResourceInstancePhase:
subject = ri.ContainingResource()
case addrs.ModuleCallInstanceOutput:
subject = ri.ModuleCallOutput()
case addrs.ModuleCallInstance:
subject = ri.Call
case addrs.ProviderFunction:
return nil
default:
log.Printf("[INFO] ReferenceTransformer: reference not found: %q", subject)
return nil
}
key = m.mapKey(path, subject)
}
vertices := m[key]
for _, rv := range vertices {
// don't include self-references
if rv == current {
continue
}
matches = append(matches, rv)
}
return matches
}
// dependsOn returns the set of vertices that the given vertex refers to from
// the configured depends_on. The bool return value indicates if depends_on was
// found in a parent module configuration.
func (m ReferenceMap) dependsOn(g *Graph, depender graphNodeDependsOn) ([]dag.Vertex, bool) {
var res []dag.Vertex
fromModule := false
refs := depender.DependsOn()
// get any implied dependencies of the depender
refs = append(refs, m.nodeDependencies(depender)...)
// This is where we record that a module has depends_on configured.
if _, ok := depender.(*nodeExpandModule); ok && len(refs) > 0 {
fromModule = true
}
for _, ref := range refs {
subject := ref.Subject
key := m.referenceMapKey(depender, subject)
vertices, ok := m[key]
if !ok {
// the ReferenceMap generates all possible keys, so any warning
// here is probably not useful for this implementation.
continue
}
for _, rv := range vertices {
// don't include self-references
if rv == depender {
continue
}
res = append(res, rv)
// Check any ancestors for transitive dependencies when we're
// not pointed directly at a resource. We can't be much more
// precise here, since in order to maintain our guarantee that
// the depender (data or ephemeral) will wait for explicit dependencies,
// if those dependencies happen to be a module, output, or variable,
// we have to find some upstream managed resource in order to check for
// a planned change.
if _, ok := rv.(GraphNodeConfigResource); !ok {
ans, _ := g.Ancestors(rv)
for _, v := range ans {
if isDependableResource(v) {
res = append(res, v)
}
}
}
}
}
parentDeps, fromParentModule := m.parentModuleDependsOn(g, depender)
res = append(res, parentDeps...)
return res, fromModule || fromParentModule
}
// Return extra depends_on references if "depender" is a data source or an ephemeral resource.
// For the "depender" we implicitly treat references to managed resources as
// depends_on entries. If "depender" references a managed resource, even if
// that reference is resolvable, it stands to reason that the user intends for
// the "depender" to require that resource in some way.
func (m ReferenceMap) nodeDependencies(depender graphNodeDependsOn) []*addrs.Reference {
var refs []*addrs.Reference
n, ok := depender.(GraphNodeConfigResource)
if !ok {
return refs
}
if n.ResourceAddr().Resource.Mode != addrs.DataResourceMode && n.ResourceAddr().Resource.Mode != addrs.EphemeralResourceMode {
return refs
}
for _, r := range depender.References() {
var resAddr addrs.Resource
switch s := r.Subject.(type) {
case addrs.Resource:
resAddr = s
case addrs.ResourceInstance:
resAddr = s.Resource
r.Subject = resAddr
case addrs.ProviderFunction:
continue
}
if resAddr.Mode != addrs.ManagedResourceMode {
// We only want to wait on directly referenced managed resources.
// Data sources and ephemeral resources have no external side effects, so normal
// references to them in the config will suffice for proper
// ordering.
continue
}
refs = append(refs, r)
}
return refs
}
// parentModuleDependsOn returns the state of vertices that a data sources parent
// module references through the module call's depends_on. The bool return
// value indicates if depends_on was found in a parent module configuration.
func (m ReferenceMap) parentModuleDependsOn(g *Graph, depender graphNodeDependsOn) ([]dag.Vertex, bool) {
var res []dag.Vertex
fromModule := false
// Look for containing modules with DependsOn.
// This should be connected directly to the module node, so we only need to
// look one step away.
for _, v := range g.DownEdges(depender) {
// we're only concerned with module expansion nodes here.
mod, ok := v.(*nodeExpandModule)
if !ok {
continue
}
deps, fromParentModule := m.dependsOn(g, mod)
for _, dep := range deps {
// add the dependency
res = append(res, dep)
// and check any transitive resource dependencies for more resources
ans, _ := g.Ancestors(dep)
for _, v := range ans {
if isDependableResource(v) {
res = append(res, v)
}
}
}
fromModule = fromModule || fromParentModule
}
return res, fromModule
}
func (m *ReferenceMap) mapKey(path addrs.Module, addr addrs.Referenceable) string {
return fmt.Sprintf("%s|%s", path.String(), addr.String())
}
// vertexReferenceablePath returns the path in which the given vertex can be
// referenced. This is the path that its results from ReferenceableAddrs
// are considered to be relative to.
//
// Only GraphNodeModulePath implementations can be referenced, so this method will
// panic if the given vertex does not implement that interface.
func vertexReferenceablePath(v dag.Vertex) addrs.Module {
sp, ok := v.(GraphNodeModulePath)
if !ok {
// Only nodes with paths can participate in a reference map.
panic(fmt.Errorf("vertexMapKey on vertex type %T which doesn't implement GraphNodeModulePath", sp))
}
if outside, ok := v.(GraphNodeReferenceOutside); ok {
// Vertex is referenced from a different module than where it was
// declared.
path, _ := outside.ReferenceOutside()
return path
}
// Vertex is referenced from the same module as where it was declared.
return sp.ModulePath()
}
// vertexReferencePath returns the path in which references _from_ the given
// vertex must be interpreted.
//
// Only GraphNodeModulePath implementations can have references, so this method
// will panic if the given vertex does not implement that interface.
func vertexReferencePath(v dag.Vertex) addrs.Module {
sp, ok := v.(GraphNodeModulePath)
if !ok {
// Only nodes with paths can participate in a reference map.
panic(fmt.Errorf("vertexReferencePath on vertex type %T which doesn't implement GraphNodeModulePath", v))
}
if outside, ok := v.(GraphNodeReferenceOutside); ok {
// Vertex makes references to objects in a different module than where
// it was declared.
_, path := outside.ReferenceOutside()
return path
}
// Vertex makes references to objects in the same module as where it
// was declared.
return sp.ModulePath()
}
// referenceMapKey produces keys for the "edges" map. "referrer" is the vertex
// that the reference is from, and "addr" is the address of the object being
// referenced.
//
// The result is an opaque string that includes both the address of the given
// object and the address of the module instance that object belongs to.
//
// Only GraphNodeModulePath implementations can be referrers, so this method will
// panic if the given vertex does not implement that interface.
func (m *ReferenceMap) referenceMapKey(referrer dag.Vertex, addr addrs.Referenceable) string {
path := vertexReferencePath(referrer)
return m.mapKey(path, addr)
}
// NewReferenceMap is used to create a new reference map for the
// given set of vertices.
func NewReferenceMap(vs []dag.Vertex) ReferenceMap {
// Build the lookup table
m := make(ReferenceMap)
for _, v := range vs {
// We're only looking for referenceable nodes
rn, ok := v.(GraphNodeReferenceable)
if !ok {
continue
}
path := vertexReferenceablePath(v)
// Go through and cache them
for _, addr := range rn.ReferenceableAddrs() {
key := m.mapKey(path, addr)
m[key] = append(m[key], v)
}
}
return m
}
// ReferencesFromConfig returns the references that a configuration has
// based on the interpolated variables in a configuration.
func ReferencesFromConfig(body hcl.Body, schema *configschema.Block) []*addrs.Reference {
if body == nil {
return nil
}
refs, _ := lang.ReferencesInBlock(addrs.ParseRef, body, schema)
return refs
}