Alternate approach to linking and locking the global cache (#2708)

Signed-off-by: Christian Mesh <christianmesh1@gmail.com>
Signed-off-by: Martin Atkins <mart@degeneration.co.uk>
Co-authored-by: Martin Atkins <mart@degeneration.co.uk>
This commit is contained in:
Christian Mesh
2025-05-08 15:26:46 -04:00
committed by GitHub
parent 06762cb402
commit 11694a6ac0
10 changed files with 386 additions and 452 deletions

View File

@@ -6,15 +6,10 @@
package providercache
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"time"
"github.com/opentofu/opentofu/internal/addrs"
"github.com/opentofu/opentofu/internal/flock"
"github.com/opentofu/opentofu/internal/getproviders"
)
@@ -70,95 +65,6 @@ func (d *Dir) BasePath() string {
return filepath.Clean(d.baseDir)
}
func (d *Dir) Lock(ctx context.Context, provider addrs.Provider, version getproviders.Version) (func() error, error) {
providerPath := getproviders.UnpackedDirectoryPathForPackage(d.baseDir, provider, version, d.targetPlatform)
// If the lockfile is put within the target directory, it can mess with hashing
// Instead we add a suffix to the last part of the path (targetplatform) and lock that file instead.
dirPath := filepath.Dir(providerPath)
lockFileName := filepath.Base(providerPath) + ".lock"
lockFile := filepath.Join(dirPath, lockFileName)
log.Printf("[TRACE] Attempting to acquire global provider lock %s", lockFile)
// Ensure the provider directory exists
//nolint: mnd // directory permissions
if err := os.MkdirAll(dirPath, 0755); err != nil {
return nil, err
}
var f *os.File
// Try to create the lock file, wait up to 1 second for transient errors to clear.
for timeout := time.After(time.Second); ; {
var err error
// Try to get a handle to the file (or create if it does not exist)
// They will all end up with the same file handle on any correctly implemented filesystem.
// This is one of the many reasons we recommend users look at the flock support of their
// networked filesystems when using the global provider cache.
// Windows: even though out flock creates an exclusive lock, we are still able to open a handle to this file and wait below for the actual lock to be provided.
// Sometimes the creates can conflict and will need to be tried multiple times (incredibly uncommon).
f, err = os.OpenFile(lockFile, os.O_RDWR|os.O_CREATE, 0644)
if err == nil {
// We don't defer f.Close() here as we explicitly want to handle it below
break
}
select {
case <-timeout:
return nil, err
case <-ctx.Done():
return nil, ctx.Err()
default:
// Chill for a bit before trying again
time.Sleep(50 * time.Millisecond)
}
}
// Wait for the file lock for up to 60s. Might make sense to have the timeout be configurable for different network conditions / package sizes.
for timeout := time.After(time.Second * 60); ; {
// We have a valid file handle, let's try to lock it (nonblocking)
err := flock.Lock(f)
if err == nil {
// Lock succeeded
break
}
select {
case <-timeout:
if f != nil {
f.Close()
}
return nil, fmt.Errorf("Unable to acquire file lock on %q: %w", lockFile, err)
case <-ctx.Done():
if f != nil {
f.Close()
}
return nil, ctx.Err()
default:
// Chill for a bit before trying again
time.Sleep(100 * time.Millisecond)
}
}
log.Printf("[TRACE] Acquired global provider lock %s", lockFile)
return func() error {
log.Printf("[TRACE] Releasing global provider lock %s", lockFile)
unlockErr := flock.Unlock(f)
// Prefer close error over unlock error
err := f.Close()
if err != nil {
return err
}
return unlockErr
}, nil
}
// ProviderVersion returns the cache entry for the requested provider version,
// or nil if the requested provider version isn't present in the cache.
func (d *Dir) ProviderVersion(provider addrs.Provider, version getproviders.Version) *CachedProvider {