mirror of
https://github.com/turbot/steampipe.git
synced 2025-12-19 18:12:43 -05:00
Increase database connection timeout and improve the error message if connection failure occurs. Closes #2377
This commit is contained in:
@@ -4,6 +4,7 @@ import "time"
|
||||
|
||||
var (
|
||||
DashboardServiceStartTimeout = 30 * time.Second
|
||||
DBConnectionTimeout = 5 * time.Second
|
||||
DBConnectionTimeout = 30 * time.Second
|
||||
DBConnectionRetryBackoff = 200 * time.Millisecond
|
||||
ServicePingInterval = 50 * time.Millisecond
|
||||
)
|
||||
|
||||
@@ -2,10 +2,12 @@ package db_common
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/sethvargo/go-retry"
|
||||
"github.com/turbot/steampipe/pkg/constants"
|
||||
"github.com/turbot/steampipe/pkg/utils"
|
||||
)
|
||||
@@ -35,27 +37,31 @@ func WaitForPool(ctx context.Context, db *pgxpool.Pool) (err error) {
|
||||
}
|
||||
}
|
||||
|
||||
// WaitForConnection waits for the db to start accepting connections and returns true
|
||||
// returns false if the dbClient does not start within a stipulated time,
|
||||
func WaitForConnection(ctx context.Context, db *pgx.Conn) (err error) {
|
||||
// WaitForConnection PINGs the DB - retrying after a backoff of constants.ServicePingInterval - but only for constants.DBConnectionTimeout
|
||||
// returns the error from the database if the dbClient does not respond successfully after a timeout
|
||||
func WaitForConnection(ctx context.Context, connection *pgx.Conn) (err error) {
|
||||
utils.LogTime("db.waitForConnection start")
|
||||
defer utils.LogTime("db.waitForConnection end")
|
||||
|
||||
pingTimer := time.NewTicker(constants.ServicePingInterval)
|
||||
timeoutAt := time.After(constants.DBConnectionTimeout)
|
||||
defer pingTimer.Stop()
|
||||
timeoutCtx, cancel := context.WithTimeout(ctx, constants.DBConnectionTimeout)
|
||||
defer func() {
|
||||
cancel()
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-pingTimer.C:
|
||||
err = db.Ping(ctx)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
case <-timeoutAt:
|
||||
return
|
||||
retryBackoff := retry.WithMaxDuration(
|
||||
constants.DBConnectionTimeout,
|
||||
retry.NewConstant(constants.ServicePingInterval),
|
||||
)
|
||||
|
||||
retryErr := retry.Do(ctx, retryBackoff, func(ctx context.Context) error {
|
||||
log.Println("[TRACE] Pinging")
|
||||
pingErr := connection.Ping(timeoutCtx)
|
||||
if pingErr != nil {
|
||||
log.Println("[TRACE] Pinging failed -> trying again")
|
||||
return retry.RetryableError(pingErr)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return retryErr
|
||||
}
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sethvargo/go-retry"
|
||||
"github.com/turbot/steampipe/pkg/constants"
|
||||
"github.com/turbot/steampipe/pkg/constants/runtime"
|
||||
"github.com/turbot/steampipe/pkg/db/db_common"
|
||||
@@ -104,3 +106,53 @@ func createLocalDbClient(ctx context.Context, opts *CreateDbOptions) (*pgx.Conn,
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// createMaintenanceClient connects to the postgres server using the
|
||||
// maintenance database (postgres) and superuser
|
||||
// this is used in a couple of places
|
||||
// 1. During installation to setup the DBMS with foreign_server, extension et.al.
|
||||
// 2. During service start and stop to query the DBMS for parameters (connected clients, database name etc.)
|
||||
//
|
||||
// this is called immediately after the service process is started and hence
|
||||
// all special handling related to service startup failures SHOULD be handled here
|
||||
func createMaintenanceClient(ctx context.Context, port int) (*pgx.Conn, error) {
|
||||
utils.LogTime("db_local.createMaintenanceClient start")
|
||||
defer utils.LogTime("db_local.createMaintenanceClient end")
|
||||
|
||||
var conn *pgx.Conn
|
||||
var err error
|
||||
|
||||
backoff := retry.WithMaxDuration(
|
||||
constants.DBConnectionTimeout,
|
||||
retry.NewConstant(constants.DBConnectionRetryBackoff),
|
||||
)
|
||||
|
||||
// create a connection to the service.
|
||||
// Retry after a backoff, but only upto a maximum duration.
|
||||
err = retry.Do(ctx, backoff, func(rCtx context.Context) error {
|
||||
connStr := fmt.Sprintf("host=localhost port=%d user=%s dbname=postgres sslmode=disable", port, constants.DatabaseSuperUser)
|
||||
log.Println("[TRACE] Trying to create maintenance client with: ", connStr)
|
||||
dbConnection, err := pgx.Connect(rCtx, connStr)
|
||||
if err != nil {
|
||||
log.Println("[TRACE] could not connect:", err)
|
||||
return retry.RetryableError(err)
|
||||
}
|
||||
log.Println("[TRACE] connected to database")
|
||||
conn = dbConnection
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Println("[TRACE] could not connect to service")
|
||||
return nil, errors.Wrap(err, "connection setup failed")
|
||||
}
|
||||
|
||||
// wait for the connection to get established
|
||||
// WaitForConnection retries on its own
|
||||
err = db_common.WaitForConnection(ctx, conn)
|
||||
if err != nil {
|
||||
conn.Close(ctx)
|
||||
log.Println("[TRACE] WaitForConnection timed out")
|
||||
return nil, err
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
@@ -9,16 +9,13 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/fatih/color"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/sethvargo/go-retry"
|
||||
psutils "github.com/shirou/gopsutil/process"
|
||||
filehelpers "github.com/turbot/go-kit/files"
|
||||
"github.com/turbot/go-kit/helpers"
|
||||
"github.com/turbot/steampipe/pkg/constants"
|
||||
"github.com/turbot/steampipe/pkg/db/db_common"
|
||||
"github.com/turbot/steampipe/pkg/filepaths"
|
||||
"github.com/turbot/steampipe/pkg/ociinstaller"
|
||||
"github.com/turbot/steampipe/pkg/ociinstaller/versionfile"
|
||||
@@ -367,35 +364,6 @@ func resolveDatabaseName(oldDbName *string) string {
|
||||
return databaseName
|
||||
}
|
||||
|
||||
// createMaintenanceClient connects to the postgres server using the
|
||||
// maintenance database and superuser
|
||||
func createMaintenanceClient(ctx context.Context, port int) (*pgx.Conn, error) {
|
||||
backoff := retry.NewConstant(200 * time.Millisecond)
|
||||
var conn *pgx.Conn
|
||||
|
||||
err := retry.Do(ctx, retry.WithMaxRetries(5, backoff), func(ctx context.Context) error {
|
||||
connStr := fmt.Sprintf("host=localhost port=%d user=%s dbname=postgres sslmode=disable", port, constants.DatabaseSuperUser)
|
||||
log.Println("[TRACE] Connection string: ", connStr)
|
||||
utils.LogTime("db_local.createClient connection open start")
|
||||
connection, err := pgx.Connect(context.Background(), connStr)
|
||||
utils.LogTime("db_local.createClient connection open end")
|
||||
if err != nil {
|
||||
return retry.RetryableError(err)
|
||||
}
|
||||
if err := db_common.WaitForConnection(ctx, connection); err != nil {
|
||||
return retry.RetryableError(err)
|
||||
}
|
||||
conn = connection
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func startServiceForInstall(port int) (*psutils.Process, error) {
|
||||
postgresCmd := exec.Command(
|
||||
getPostgresBinaryExecutablePath(),
|
||||
|
||||
@@ -188,7 +188,6 @@ func startDB(ctx context.Context, port int, listen StartListenType, invoker cons
|
||||
return res.SetError(err)
|
||||
}
|
||||
|
||||
// sometimes connecting to the db immediately after startup results in a dial error - so retry
|
||||
databaseName, err := getDatabaseName(ctx, port)
|
||||
if err != nil {
|
||||
return res.SetError(err)
|
||||
@@ -311,7 +310,7 @@ func startPostgresProcess(ctx context.Context, port int, listen StartListenType,
|
||||
func retrieveDatabaseNameFromService(ctx context.Context, port int) (string, error) {
|
||||
connection, err := createMaintenanceClient(ctx, port)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", fmt.Errorf("failed to connect to the database: %v - please try again or reset your steampipe database", err)
|
||||
}
|
||||
defer connection.Close(ctx)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ func LoadWorkspacePromptingForVariables(ctx context.Context) (*Workspace, *modco
|
||||
workspacePath := viper.GetString(constants.ArgModLocation)
|
||||
t := time.Now()
|
||||
defer func() {
|
||||
log.Printf("[TRANCE] Workspace load took %dms\n", time.Since(t).Milliseconds())
|
||||
log.Printf("[TRACE] Workspace load took %dms\n", time.Since(t).Milliseconds())
|
||||
}()
|
||||
w, errAndWarnings := Load(ctx, workspacePath)
|
||||
if errAndWarnings.GetError() == nil {
|
||||
|
||||
13
tests/manual_testing/service/start-kill.sh
Executable file
13
tests/manual_testing/service/start-kill.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
for i in {1..10}; do
|
||||
echo "############################################################### STARTING"
|
||||
STEAMPIPE_LOG=trace steampipe service start
|
||||
ps -ef | grep steampipe
|
||||
STEAMPIPE_LOG=trace steampipe query "select pg_sleep(10)" &
|
||||
|
||||
echo "############################################################### KILLING"
|
||||
pkill -9 steampipe
|
||||
ps -ef | grep steampipe
|
||||
pkill -9 postgres
|
||||
ps -ef | grep steampipe
|
||||
echo "############################################################### DONE"
|
||||
done
|
||||
7
tests/manual_testing/service/start-stop.sh
Executable file
7
tests/manual_testing/service/start-stop.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
for i in {1..10}; do
|
||||
echo "############################################################### STARTING"
|
||||
STEAMPIPE_LOG=trace steampipe service start
|
||||
echo "############################################################### STOPPING"
|
||||
STEAMPIPE_LOG=trace steampipe service stop
|
||||
echo "############################################################### DONE"
|
||||
done
|
||||
Reference in New Issue
Block a user