engine/applying: Track grapheval/workgraph requests

There is currently a bug in the apply engine that's causing a self-dependency error during promise resolution, which was a good reminder that we'd not previously finished connecting all of the parts to be able to unwind such problems into a useful error message. Due to how the work is split between the apply engine, the evaluator, and the execgraph package it takes some awkward back-and-forth to get all of the needed information together into one place. This compromise aims to do as little work as possible in the happy path and defer more expensive analysis until we actually know we're going to report an error message. In this case we can't really avoid proactively collecting the request IDs because we don't know ahead of time what (if anything) will be involved in a promise error, but only when actually generating an error message will we dig into the original source execution graph to find out what each of the affected requests was actually trying to do and construct a human-friendly summary of each one. This was a bit of a side-quest relative to the current goal of just getting things basically working with a simple configuration, but this is useful in figuring out what's going on with the current bug (which will be fixed in a future commit) and will probably be useful when dealing with future bugs too. Signed-off-by: Martin Atkins <mart@degeneration.co.uk>
2026-05-23 22:00:54 -04:00 · 2026-01-20 14:50:06 -08:00
parent 2cd05d7662
commit 431697c812
7 changed files with 235 additions and 83 deletions
--- a/internal/engine/applying/apply.go
+++ b/internal/engine/applying/apply.go
@@ -14,6 +14,7 @@ import (
 	"github.com/opentofu/opentofu/internal/engine/internal/execgraph"
 	"github.com/opentofu/opentofu/internal/engine/plugins"
 	"github.com/opentofu/opentofu/internal/lang/eval"
+	"github.com/opentofu/opentofu/internal/lang/grapheval"
 	"github.com/opentofu/opentofu/internal/plans"
 	"github.com/opentofu/opentofu/internal/states"
 	"github.com/opentofu/opentofu/internal/tfdiags"
@@ -41,13 +42,16 @@ func ApplyPlannedChanges(ctx context.Context, plan *plans.Plan, configInst *eval
 		return nil, diags
 	}

-	execGraph, execOps, moreDiags := compileExecutionGraph(ctx, plan, oracle, plugins)
+	execGraphSrc, execGraph, execOps, moreDiags := compileExecutionGraph(ctx, plan, oracle, plugins)
 	diags = diags.Append(moreDiags)
 	if moreDiags.HasErrors() {
 		return nil, diags
 	}
 	glue.graph = execGraph

+	reqTracker := newRequestTracker(execGraphSrc, execOps)
+	ctx = grapheval.ContextWithRequestTracker(ctx, reqTracker)
+
 	moreDiags = execGraph.Execute(ctx)
 	diags = diags.Append(moreDiags)

--- a/internal/engine/applying/operations.go
+++ b/internal/engine/applying/operations.go
@@ -63,7 +63,7 @@ type execOperations struct {

 var _ exec.Operations = (*execOperations)(nil)

-func compileExecutionGraph(ctx context.Context, plan *plans.Plan, oracle *eval.ApplyOracle, plugins plugins.Plugins) (*execgraph.CompiledGraph, *execOperations, tfdiags.Diagnostics) {
+func compileExecutionGraph(ctx context.Context, plan *plans.Plan, oracle *eval.ApplyOracle, plugins plugins.Plugins) (*execgraph.Graph, *execgraph.CompiledGraph, *execOperations, tfdiags.Diagnostics) {
 	var diags tfdiags.Diagnostics

 	execGraph, err := execgraph.UnmarshalGraph(plan.ExecutionGraph)
@@ -73,7 +73,7 @@ func compileExecutionGraph(ctx context.Context, plan *plans.Plan, oracle *eval.A
 			"Invalid execution graph in plan",
 			fmt.Sprintf("Failed to unmarshal the execution graph: %s.", tfdiags.FormatError(err)),
 		))
-		return nil, nil, diags
+		return nil, nil, nil, diags
 	}
 	if logging.IsDebugOrHigher() {
 		// FIXME: The DebugRepr currently includes ctydebug representations of
@@ -90,7 +90,7 @@ func compileExecutionGraph(ctx context.Context, plan *plans.Plan, oracle *eval.A
 	compiledGraph, moreDiags := execGraph.Compile(ops)
 	diags = diags.Append(moreDiags)
 	if moreDiags.HasErrors() {
-		return nil, nil, diags
+		return nil, nil, nil, diags
 	}

 	ops.priorState = plan.PriorState.SyncWrapper()
@@ -98,7 +98,7 @@ func compileExecutionGraph(ctx context.Context, plan *plans.Plan, oracle *eval.A
 	ops.configOracle = oracle
 	ops.plugins = plugins

-	return compiledGraph, ops, diags
+	return execGraph, compiledGraph, ops, diags
 }

 // Finish closes the operations object and returns its final state snapshot.
--- a/internal/engine/applying/request_tracker.go
+++ b/internal/engine/applying/request_tracker.go
@@ -0,0 +1,88 @@
+// Copyright (c) The OpenTofu Authors
+// SPDX-License-Identifier: MPL-2.0
+// Copyright (c) 2023 HashiCorp, Inc.
+// SPDX-License-Identifier: MPL-2.0
+
+package applying
+
+import (
+	"context"
+	"iter"
+	"maps"
+	"sync"
+
+	"github.com/apparentlymart/go-workgraph/workgraph"
+	"github.com/opentofu/opentofu/internal/engine/internal/execgraph"
+	"github.com/opentofu/opentofu/internal/lang/eval"
+	"github.com/opentofu/opentofu/internal/lang/grapheval"
+)
+
+// execRequestTracker is our implementation of [grapheval.RequestTracker] for
+// use when processing an execution graph.
+//
+// An instance of this should be associated with the context passed to the
+// graph execution function so that we can use it to produce helpful error
+// messages if promise-related errors occur during execution.
+type execRequestTracker struct {
+	configOracle *eval.ApplyOracle
+	graph        *execgraph.Graph
+
+	promiseReqsMu sync.Mutex
+	promiseReqs   map[execgraph.PromiseDrivenResultKey]workgraph.RequestID
+}
+
+var _ grapheval.RequestTracker = (*execRequestTracker)(nil)
+var _ execgraph.RequestTrackerWithNotify = (*execRequestTracker)(nil)
+
+func newRequestTracker(graph *execgraph.Graph, ops *execOperations) *execRequestTracker {
+	return &execRequestTracker{
+		configOracle: ops.configOracle,
+		graph:        graph,
+		promiseReqs:  make(map[execgraph.PromiseDrivenResultKey]workgraph.RequestID),
+	}
+}
+
+// ActiveRequests implements [grapheval.RequestTracker].
+func (e *execRequestTracker) ActiveRequests() iter.Seq2[workgraph.RequestID, grapheval.RequestInfo] {
+	return func(yield func(workgraph.RequestID, grapheval.RequestInfo) bool) {
+		// To make the downstream implementations of this simpler we
+		// always visit all of the requests known to the evaluator and just
+		// discard them if the caller has stopped consuming our sequence.
+		// In practice we should always read the whole sequence to completion
+		// in the error reporting path anyway, so this is really just to
+		// honor the general expectations of [iter.Seq2].
+		keepGoing := true
+		e.configOracle.AnnounceAllGraphevalRequests(func(reqID workgraph.RequestID, info grapheval.RequestInfo) {
+			if !keepGoing {
+				return
+			}
+			keepGoing = yield(reqID, info)
+		})
+		if !keepGoing {
+			return
+		}
+		// We'll also report any requests that execgraph has announced to us
+		// using [execRequestTracker.TrackExecutionGraphRequest].
+		//
+		// We copy the map of requests here just so we don't hold this lock
+		// across yields to the caller. We should only end up in here when
+		// we're handling an error so it's okay to spend this extra time.
+		e.promiseReqsMu.Lock()
+		promiseReqs := make(map[execgraph.PromiseDrivenResultKey]workgraph.RequestID, len(e.promiseReqs))
+		maps.Copy(promiseReqs, e.promiseReqs)
+		e.promiseReqsMu.Unlock()
+		for key, reqID := range promiseReqs {
+			info := e.graph.PromiseDrivenRequestInfo(key)
+			if !yield(reqID, info) {
+				return
+			}
+		}
+	}
+}
+
+// TrackExecutionGraphRequest implements [execgraph.RequestTrackerWithNotify].
+func (e *execRequestTracker) TrackExecutionGraphRequest(ctx context.Context, key execgraph.PromiseDrivenResultKey, reqID workgraph.RequestID) {
+	e.promiseReqsMu.Lock()
+	e.promiseReqs[key] = reqID
+	e.promiseReqsMu.Unlock()
+}
--- a/internal/engine/internal/execgraph/compiled.go
+++ b/internal/engine/internal/execgraph/compiled.go
@@ -14,6 +14,7 @@ import (
 	"github.com/zclconf/go-cty/cty"

 	"github.com/opentofu/opentofu/internal/addrs"
+	"github.com/opentofu/opentofu/internal/lang/grapheval"
 	"github.com/opentofu/opentofu/internal/tfdiags"
 )

@@ -117,3 +118,59 @@ func (c *CompiledGraph) ResourceInstanceValue(ctx context.Context, addr addrs.Ab
 	}
 	return getter(ctx)
 }
+
+// trackWorkgraphRequest attempts to notify an upstream about a workgraph
+// request we've started as part of producing the result for the given
+// reference.
+//
+// This is here to help an execgraph caller -- typically the applying engine --
+// to produce a more helpful error message if any promise-related errors occur
+// during execution graph processing. Calls to this are not used when execution
+// is successful, but unfortunately we nonetheless need to send this information
+// anyway just in case a related error occurs downstream somewhere.
+//
+// In order to benefit from this the execgraph caller must use []
+func trackWorkgraphRequest(ctx context.Context, operationIdx int, reqID workgraph.RequestID) {
+	reqTracker := grapheval.RequestTrackerFromContext(ctx)
+	if reqTracker == nil {
+		return
+	}
+	// Unfortunately this relies on the "optional extension interface" pattern,
+	// so we have no compile-time checking that the applying engine implements
+	// this correctly. This is a pragmatic concession since we should only
+	// need this information in "should never happen" error cases in order to
+	// produce more helpful diagnostic information, since there should not be
+	// any promise-related errors when handling a correctly-constructed
+	// execution graph.
+	withNotify, ok := reqTracker.(RequestTrackerWithNotify)
+	if !ok {
+		return
+	}
+	key := PromiseDrivenResultKey{operationIdx: operationIdx}
+	withNotify.TrackExecutionGraphRequest(ctx, key, reqID)
+}
+
+// RequestTrackerWithNotify is an optional extension of [grapheval.RequestTracker]
+// which must be implemented by the request tracker passed in
+// [CompiledGraph.Execute]'s [context.Context] argument if the request tracker
+// needs to be aware of workgraph requests made as part of resolving results
+// during execution graph processing.
+//
+// If the given context has no request tracker at all, or if the request tracker
+// does not implement this extension interface, then no notifications will be
+// delivered but execution is otherwise unaffected.
+type RequestTrackerWithNotify interface {
+	grapheval.RequestTracker
+	TrackExecutionGraphRequest(ctx context.Context, key PromiseDrivenResultKey, reqID workgraph.RequestID)
+}
+
+// PromiseDrivenResultKey is an opaque representation of a [ResultRef] whose
+// result is produced using [grapheval] and [workgraph] mechanisms, which
+// we use in [RequsetTrackerWithNotify] to give a caller just enough information
+// to selectively request more detail from a source graph only if necessary
+// to report an error.
+type PromiseDrivenResultKey struct {
+	// for now operations are the only kind of result we track in this way,
+	// and so this just directly tracks the operation index.
+	operationIdx int
+}
--- a/internal/engine/internal/execgraph/compiler.go
+++ b/internal/engine/internal/execgraph/compiler.go
@@ -9,7 +9,6 @@ import (
 	"context"
 	"fmt"
 	"iter"
-	"strings"

 	"github.com/apparentlymart/go-workgraph/workgraph"
 	"github.com/zclconf/go-cty/cty"
@@ -148,6 +147,7 @@ func (c *compiler) Compile() (*CompiledGraph, tfdiags.Diagnostics) {
 			resolver := opResolvers[opIdx]
 			worker := workgraph.NewWorker(resolver)
 			ctx := grapheval.ContextWithWorker(parentCtx, worker)
+			trackWorkgraphRequest(ctx, opIdx, resolver.RequestID())
 			ret, ok, diags := mainExec(ctx)
 			// Resolving the promise might allow dependent operations to begin.
 			resolver.ReportSuccess(worker, nodeResultRaw{
@@ -234,7 +234,6 @@ func (c *compiler) compileResultRef(ref AnyResultRef) nodeExecuteRaw {
 		// that the downstream operations that rely on these results will
 		// type-assert them dynamically as needed.
 		opResults := c.opResults
-		opResolvers := c.opResolvers
 		index := ref.operationResultIndex()
 		return func(ctx context.Context) (any, bool, tfdiags.Diagnostics) {
 			var diags tfdiags.Diagnostics
@@ -246,7 +245,7 @@ func (c *compiler) compileResultRef(ref AnyResultRef) nodeExecuteRaw {
 				// during the apply phase is always a bug in OpenTofu because
 				// we should've detected any user-caused problems during the
 				// planning phase.
-				diags = diags.Append(diagsForWorkgraphError(ctx, err, opResolvers))
+				diags = diags.Append(grapheval.DiagnosticsForWorkgraphError(ctx, err))
 				return nil, false, diags
 			}
 			return resultRaw.Value, resultRaw.CanContinue, resultRaw.Diagnostics
@@ -311,78 +310,3 @@ type nodeResultRaw struct {
 	CanContinue bool
 	Diagnostics tfdiags.Diagnostics
 }
-
-func diagsForWorkgraphError(ctx context.Context, err error, operationResolvers []workgraph.Resolver[nodeResultRaw]) tfdiags.Diagnostics {
-	// findRequestName makes a best effort to describe the given workgraph request
-	// in terms of operations in the execution graph, though because all of
-	// these are "should never happen" cases this focuses mainly on providing
-	// information to help OpenTofu developers with debugging, rather than
-	// end-user-friendly information. (Any user-caused problems ought to have
-	// been detected during the planning phase, so any problem we encounter
-	// during apply is always an OpenTofu bug.)
-	//
-	// As usual we tolerate this being a pretty inefficient linear search
-	// over all of the requests we know about because we should only end up
-	// here when something has gone very wrong, and this approach avoids
-	// tracking a bunch of extra debug state in the happy path.
-	findRequestName := func(reqId workgraph.RequestID) string {
-		for opIdx, resolver := range operationResolvers {
-			if resolver.RequestID() == reqId {
-				return fmt.Sprintf("execution graph operation r[%d]", opIdx)
-			}
-		}
-		// If we fall out here then we presumably have a request ID from some
-		// other part of the system, such as from package configgraph. We
-		// might be able to get a useful description from a request tracker
-		// attached to the given context, if so.
-		// Note that we shouldn't really get here if the execution graph was
-		// constructed correctly because the "waiter" nodes used by anything
-		// that refers to the evaluator's oracle should block us from trying
-		// to retrieve something that isn't ready yet, but we'll attempt this
-		// anyway because if we get here then there's a bug somewhere by
-		// definition.
-		if reqTracker := grapheval.RequestTrackerFromContext(ctx); reqTracker != nil {
-			for candidate, info := range reqTracker.ActiveRequests() {
-				if candidate == reqId {
-					return info.Name
-				}
-			}
-		}
-		// If all of that failed then we'll just return a useless placeholder
-		// and hope that something else in the error message or debug log
-		// gives some clue as to what's going on.
-		return "<unknown>"
-	}
-
-	var diags tfdiags.Diagnostics
-	const summary = "Apply-time execution error"
-	switch err := err.(type) {
-	case workgraph.ErrSelfDependency:
-		var buf strings.Builder
-		buf.WriteString("While performing actions during the apply phase, OpenTofu detected a self-dependency cycle between the following:\n")
-		for _, reqId := range err.RequestIDs {
-			fmt.Fprintf(&buf, "  - %s\n", findRequestName(reqId))
-		}
-		buf.WriteString("\nThis is a bug in OpenTofu.")
-		diags = diags.Append(tfdiags.Sourceless(
-			tfdiags.Error,
-			summary,
-			buf.String(),
-		))
-	case workgraph.ErrUnresolved:
-		diags = diags.Append(tfdiags.Sourceless(
-			tfdiags.Error,
-			summary,
-			fmt.Sprintf("While performing actions during the apply phase, a request for %q was left unresolved. This is a bug in OpenTofu.", findRequestName(err.RequestID)),
-		))
-	default:
-		// We're not expecting any other error types here so we'll just
-		// return something generic.
-		diags = diags.Append(tfdiags.Sourceless(
-			tfdiags.Error,
-			summary,
-			fmt.Sprintf("While performing actions during the apply phase, OpenTofu encountered an unexpected error: %s.\n\nThis is a bug in OpenTofu.", err),
-		))
-	}
-	return diags
-}
--- a/internal/engine/internal/execgraph/graph.go
+++ b/internal/engine/internal/execgraph/graph.go
@@ -15,6 +15,7 @@ import (
 	"github.com/zclconf/go-cty/cty"

 	"github.com/opentofu/opentofu/internal/addrs"
+	"github.com/opentofu/opentofu/internal/lang/grapheval"
 )

 type Graph struct {
@@ -81,6 +82,67 @@ type Graph struct {
 	resourceInstanceResults addrs.Map[addrs.AbsResourceInstance, ResourceInstanceResultRef]
 }

+// PromiseDrivenRequestInfo is part of our somewhat-roundabout means of
+// gathering additional context about promise-based requests only when we
+// enounter an error that makes it worth gathering this information.
+//
+// Given a [PromiseDrivenResultKey] previously advertised to a
+// [RequestTrackerWithNotify] implementation used with [CompiledGraph.Execute]
+// on a compiled version of the same graph, this returns a
+// [grapheval.RequestInfo] value summarizing what that request was intending
+// to achieve in terms that are suitable to include in an error message that
+// someone will presmuably submit in an OpenTofu bug report, because
+// promise-related errors should only crop up during processing of
+// incorrectly-constructed execution graphs.
+func (g *Graph) PromiseDrivenRequestInfo(key PromiseDrivenResultKey) grapheval.RequestInfo {
+	opIdx := key.operationIdx
+	if opIdx < 0 || opIdx >= len(g.ops) {
+		// We seem to have a key produced from a different graph than this one,
+		// but we'll tolerate this and just return a placeholder so that we
+		// can hopefully return at least a partial error message.
+		return grapheval.RequestInfo{
+			Name: "unknown execution graph operation (this is a bug in OpenTofu)",
+		}
+	}
+	opDesc := g.ops[opIdx]
+	return grapheval.RequestInfo{
+		Name: "internal operation: " + g.operationDebugSummary(opDesc),
+	}
+}
+
+func (g *Graph) operationDebugSummary(opDesc operationDesc) string {
+	var buf strings.Builder
+	buf.WriteString(strings.TrimPrefix(opDesc.opCode.String(), "op"))
+	buf.WriteByte('(')
+	for argIdx, arg := range opDesc.operands {
+		if argIdx != 0 {
+			buf.WriteString(", ")
+		}
+		switch arg := arg.(type) {
+		case anyOperationResultRef:
+			// We'll show this as a nested function call so that we can also
+			// see whatever arguments it has, since hopefully this will
+			// eventually lead to something useful like a resource instance addr.
+			childOpIdx := arg.operationResultIndex()
+			opDesc := g.ops[childOpIdx]
+			buf.WriteString(g.operationDebugSummary(opDesc))
+		case valueResultRef:
+			v := g.constantVals[arg.index]
+			fmt.Fprintf(&buf, "<%s value>", v.Type().FriendlyName())
+		case resourceInstAddrResultRef, providerInstAddrResultRef, waiterResultRef, nil:
+			// Our resultDebugRepr representation is fine for these ones
+			buf.WriteString(g.resultDebugRepr(arg))
+		default:
+			// We don't print out anything we're not already familiar with
+			// because we might add a new kind of result later that isn't
+			// appropriate to include in a UI-facing error message.
+			buf.WriteString("[...]")
+		}
+	}
+	buf.WriteByte(')')
+	return buf.String()
+}
+
 // DebugRepr returns a relatively-concise string representation of the
 // graph which includes all of the registered operations and their operands,
 // along with any constant values they rely on.
--- a/internal/lang/eval/config_apply.go
+++ b/internal/lang/eval/config_apply.go
@@ -9,6 +9,7 @@ import (
 	"context"
 	"fmt"

+	"github.com/apparentlymart/go-workgraph/workgraph"
 	"github.com/zclconf/go-cty/cty"

 	"github.com/opentofu/opentofu/internal/addrs"
@@ -187,3 +188,19 @@ func (o *ApplyOracle) ProviderInstanceConfig(ctx context.Context, addr addrs.Abs
 	v, diags := inst.ConfigValue(ctx)
 	return configgraph.PrepareOutgoingValue(v), diags
 }
+
+// AnnounceAllGraphevalRequests calls the given function once for each internal
+// workgraph request that has previously been started by requests to this
+// oracle.
+//
+// This is used by the apply engine as part of its implementation of
+// [grapheval.RequestTracker], so that promise-resolution-related diagnostics
+// can include information about which requests were involved in the problem.
+//
+// This information is collected as a separate step only when needed because
+// that avoids us needing to keep track of this metadata on the happy path,
+// so that we only pay the cost of gathering this data when we're actually
+// going to use it for something.
+func (o *ApplyOracle) AnnounceAllGraphevalRequests(announce func(workgraph.RequestID, grapheval.RequestInfo)) {
+	o.root.AnnounceAllGraphevalRequests(announce)
+}