Add configurable timeout and retry logic for Qlik Sense API calls

- Add timeout, maxRetries, and retryDelayMilliseconds config options for both health check and proxy session APIs
- Implement exponential backoff retry logic with configurable parameters
- Change healthmetrics.js to use async/await pattern for better error handling
- Process health checks concurrently with Promise.allSettled for better performance
- Default timeout increased from 5s to 30s to handle slower/wifi connections
- Default retry count set to 3 attempts with 1s initial delay
- Add comprehensive logging for retry attempts and final failures

Co-authored-by: mountaindude <1029262+mountaindude@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-12-18 06:13:05 +00:00
parent f7679cb8c2
commit c444156871
3 changed files with 158 additions and 56 deletions

View File

@@ -549,6 +549,9 @@ Butler-SOS:
enableSessionExtract: true # Query unique user IDs of what users have sessions open (true/false)? enableSessionExtract: true # Query unique user IDs of what users have sessions open (true/false)?
# Items below are mandatory if enableSessionExtract=true # Items below are mandatory if enableSessionExtract=true
pollingInterval: 30000 # How often (milliseconds) should session data be polled? pollingInterval: 30000 # How often (milliseconds) should session data be polled?
timeoutMilliseconds: 30000 # HTTP request timeout (milliseconds) for proxy session API calls. Default: 30000
maxRetries: 3 # Number of times to retry failed API calls. Default: 3
retryDelayMilliseconds: 1000 # Initial delay between retries (milliseconds). Doubles for each retry. Default: 1000
excludeUser: # Optional blacklist of users that should be disregarded when it comes to session monitoring. excludeUser: # Optional blacklist of users that should be disregarded when it comes to session monitoring.
# Blacklist is only applied to data in InfluxDB. All session data will be sent to MQTT. # Blacklist is only applied to data in InfluxDB. All session data will be sent to MQTT.
# - directory: LAB # - directory: LAB
@@ -558,6 +561,9 @@ Butler-SOS:
serversToMonitor: serversToMonitor:
pollingInterval: 30000 # How often (milliseconds) should the healthcheck API be polled? pollingInterval: 30000 # How often (milliseconds) should the healthcheck API be polled?
timeoutMilliseconds: 30000 # HTTP request timeout (milliseconds) for health check API calls. Default: 30000
maxRetries: 3 # Number of times to retry failed API calls. Default: 3
retryDelayMilliseconds: 1000 # Initial delay between retries (milliseconds). Doubles for each retry. Default: 1000
# If false, Butler SOS will accept TLS certificates on the server without verifying them with the CA. # If false, Butler SOS will accept TLS certificates on the server without verifying them with the CA.
# If true, data will only be retrieved from the Sense server if that server's TLS cert verifies # If true, data will only be retrieved from the Sense server if that server's TLS cert verifies

View File

@@ -20,14 +20,16 @@ import { getCertificates, createCertificateOptions } from './cert-utils.js';
* *
* This function makes an HTTPS request to the Sense engine healthcheck API and * This function makes an HTTPS request to the Sense engine healthcheck API and
* distributes the data to configured destinations (MQTT, InfluxDB, New Relic, Prometheus). * distributes the data to configured destinations (MQTT, InfluxDB, New Relic, Prometheus).
* Implements retry logic with exponential backoff for transient network failures.
* *
* @param {string} serverName - The name of the server as defined in the config. * @param {string} serverName - The name of the server as defined in the config.
* @param {string} host - The hostname or IP address of the Sense server. * @param {string} host - The hostname or IP address of the Sense server.
* @param {object} tags - Tags/metadata to associate with the server metrics. * @param {object} tags - Tags/metadata to associate with the server metrics.
* @param {object|null} headers - Additional headers to include in the request. * @param {object|null} headers - Additional headers to include in the request.
* @returns {void} * @param {number} retryCount - Current retry attempt number (used internally for recursion). Defaults to 0.
* @returns {Promise<void>}
*/ */
export function getHealthStatsFromSense(serverName, host, tags, headers) { export async function getHealthStatsFromSense(serverName, host, tags, headers, retryCount = 0) {
globals.logger.debug(`HEALTH: URL=https://${host}/engine/healthcheck`); globals.logger.debug(`HEALTH: URL=https://${host}/engine/healthcheck`);
// Get certificate configuration options // Get certificate configuration options
@@ -49,6 +51,17 @@ export function getHealthStatsFromSense(serverName, host, tags, headers) {
rejectUnauthorized: globals.config.get('Butler-SOS.serversToMonitor.rejectUnauthorized'), rejectUnauthorized: globals.config.get('Butler-SOS.serversToMonitor.rejectUnauthorized'),
}); });
// Get timeout and retry settings from config with fallback defaults
const timeout = globals.config.has('Butler-SOS.serversToMonitor.timeoutMilliseconds')
? globals.config.get('Butler-SOS.serversToMonitor.timeoutMilliseconds')
: 30000;
const maxRetries = globals.config.has('Butler-SOS.serversToMonitor.maxRetries')
? globals.config.get('Butler-SOS.serversToMonitor.maxRetries')
: 3;
const retryDelay = globals.config.has('Butler-SOS.serversToMonitor.retryDelayMilliseconds')
? globals.config.get('Butler-SOS.serversToMonitor.retryDelayMilliseconds')
: 1000;
const requestSettings = { const requestSettings = {
url: `https://${host}/engine/healthcheck`, url: `https://${host}/engine/healthcheck`,
method: 'get', method: 'get',
@@ -57,7 +70,7 @@ export function getHealthStatsFromSense(serverName, host, tags, headers) {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
httpsAgent, httpsAgent,
timeout: 5000, timeout,
maxRedirects: 5, maxRedirects: 5,
}; };
@@ -71,43 +84,71 @@ export function getHealthStatsFromSense(serverName, host, tags, headers) {
}); });
} }
axios try {
.request(requestSettings) const response = await axios.request(requestSettings);
.then((response) => {
if (response.status === 200) {
globals.logger.verbose(`HEALTH: Received ok response from ${tags.host}`);
globals.logger.debug(`HEALTH: ${JSON.stringify(response.data)}`);
// Post to MQTT if (response.status === 200) {
if (globals.config.get('Butler-SOS.mqttConfig.enable') === true) { globals.logger.verbose(`HEALTH: Received ok response from ${tags.host}`);
globals.logger.debug('HEALTH: Calling HEALTH metrics MQTT posting method'); globals.logger.debug(`HEALTH: ${JSON.stringify(response.data)}`);
postHealthToMQTT(host, tags.host, response.data);
}
// Post to Influxdb // Post to MQTT
if (globals.config.get('Butler-SOS.influxdbConfig.enable') === true) { if (globals.config.get('Butler-SOS.mqttConfig.enable') === true) {
globals.logger.debug('HEALTH: Calling HEALTH metrics Influxdb posting method'); globals.logger.debug('HEALTH: Calling HEALTH metrics MQTT posting method');
postHealthMetricsToInfluxdb(serverName, host, response.data, tags); postHealthToMQTT(host, tags.host, response.data);
}
// Post to New Relic
if (globals.config.get('Butler-SOS.newRelic.enable') === true) {
globals.logger.debug('HEALTH: Calling HEALTH metrics New Relic posting method');
postHealthMetricsToNewRelic(host, response.data, tags);
}
// Save latest available data for Prometheus
if (globals.config.get('Butler-SOS.prometheus.enable') === true) {
globals.logger.debug('HEALTH: Calling HEALTH metrics Prometheus method');
saveHealthMetricsToPrometheus(host, response.data, tags);
}
} }
})
.catch((err) => { // Post to Influxdb
globals.logger.error( if (globals.config.get('Butler-SOS.influxdbConfig.enable') === true) {
`HEALTH: Error when calling health check API for server '${serverName}' (${host}): ${globals.getErrorMessage(err)}` globals.logger.debug('HEALTH: Calling HEALTH metrics Influxdb posting method');
postHealthMetricsToInfluxdb(serverName, host, response.data, tags);
}
// Post to New Relic
if (globals.config.get('Butler-SOS.newRelic.enable') === true) {
globals.logger.debug('HEALTH: Calling HEALTH metrics New Relic posting method');
postHealthMetricsToNewRelic(host, response.data, tags);
}
// Save latest available data for Prometheus
if (globals.config.get('Butler-SOS.prometheus.enable') === true) {
globals.logger.debug('HEALTH: Calling HEALTH metrics Prometheus method');
saveHealthMetricsToPrometheus(host, response.data, tags);
}
}
} catch (err) {
// Check if we should retry based on error type and retry count
const shouldRetry =
retryCount < maxRetries &&
(err.code === 'ECONNABORTED' || // Timeout
err.code === 'ECONNRESET' || // Connection reset
err.code === 'ETIMEDOUT' || // Network timeout
err.code === 'ENOTFOUND' || // DNS lookup failed
err.code === 'ENETUNREACH'); // Network unreachable
if (shouldRetry) {
// Calculate exponential backoff delay
const delay = retryDelay * Math.pow(2, retryCount);
globals.logger.warn(
`HEALTH: Error calling health check API for server '${serverName}' (${host}): ${globals.getErrorMessage(err)}. Retrying in ${delay}ms (attempt ${retryCount + 1}/${maxRetries})...`
); );
});
// Wait before retrying
await new Promise((resolve) => setTimeout(resolve, delay));
// Recursive retry
return getHealthStatsFromSense(serverName, host, tags, headers, retryCount + 1);
}
// Final error after all retries exhausted or non-retryable error
globals.logger.error(
`HEALTH: Error when calling health check API for server '${serverName}' (${host}): ${globals.getErrorMessage(err)}`
);
if (retryCount > 0) {
globals.logger.error(
`HEALTH: Failed after ${retryCount} ${retryCount === 1 ? 'retry' : 'retries'}`
);
}
}
} }
/** /**
@@ -120,29 +161,30 @@ export function getHealthStatsFromSense(serverName, host, tags, headers) {
*/ */
export function setupHealthMetricsTimer() { export function setupHealthMetricsTimer() {
// Configure timer for getting healthcheck data // Configure timer for getting healthcheck data
setInterval(() => { setInterval(async () => {
globals.logger.verbose('HEALTH: Event started: Statistics collection'); globals.logger.verbose('HEALTH: Event started: Statistics collection');
globals.serverList.forEach((server) => { // Process all servers concurrently with error handling
globals.logger.verbose(`HEALTH: Getting stats for server: ${server.serverName}`); const healthCheckPromises = globals.serverList.map(async (server) => {
globals.logger.debug(`HEALTH: Server details: ${JSON.stringify(server)}`); try {
globals.logger.verbose(`HEALTH: Getting stats for server: ${server.serverName}`);
globals.logger.debug(`HEALTH: Server details: ${JSON.stringify(server)}`);
// Get per-server tags // Get per-server tags
const tags = getServerTags(globals.logger, server); const tags = getServerTags(globals.logger, server);
// Save tags to global variable. // Get per-server headers
// Add a new object to the array, with properties host andd tags. const headers = getServerHeaders(server);
// The tags property is an array with all the tags for the server.
// Each tag object has a name and a value.
// globals.serverTags.push({
// host: server.host,
// tags,
// });
// Get per-server headers await getHealthStatsFromSense(server.serverName, server.host, tags, headers);
const headers = getServerHeaders(server); } catch (err) {
globals.logger.error(
getHealthStatsFromSense(server.serverName, server.host, tags, headers); `HEALTH: Unexpected error processing health stats for server '${server.serverName}': ${globals.getErrorMessage(err)}`
);
}
}); });
// Wait for all health checks to complete
await Promise.allSettled(healthCheckPromises);
}, globals.config.get('Butler-SOS.serversToMonitor.pollingInterval')); }, globals.config.get('Butler-SOS.serversToMonitor.pollingInterval'));
} }

View File

@@ -206,14 +206,22 @@ function prepUserSessionMetrics(serverName, host, virtualProxy, body, tags) {
* This function makes an API call to the Qlik Sense Proxy API to get information about * This function makes an API call to the Qlik Sense Proxy API to get information about
* active user sessions. It then processes this data and sends it to configured destinations * active user sessions. It then processes this data and sends it to configured destinations
* (MQTT, InfluxDB, New Relic, Prometheus). * (MQTT, InfluxDB, New Relic, Prometheus).
* Implements retry logic with exponential backoff for transient network failures.
* *
* @param {string} serverName - Name of the Qlik Sense server * @param {string} serverName - Name of the Qlik Sense server
* @param {string} host - Host name or IP of the Qlik Sense server * @param {string} host - Host name or IP of the Qlik Sense server
* @param {string} virtualProxy - Virtual proxy prefix * @param {string} virtualProxy - Virtual proxy prefix
* @param {object} influxTags - Tags to associate with metrics in InfluxDB * @param {object} influxTags - Tags to associate with metrics in InfluxDB
* @param {number} retryCount - Current retry attempt number (used internally for recursion). Defaults to 0.
* @returns {Promise<void>} Promise that resolves when the operation is complete * @returns {Promise<void>} Promise that resolves when the operation is complete
*/ */
export async function getProxySessionStatsFromSense(serverName, host, virtualProxy, influxTags) { export async function getProxySessionStatsFromSense(
serverName,
host,
virtualProxy,
influxTags,
retryCount = 0
) {
// Current user sessions are retrived using this API: // Current user sessions are retrived using this API:
// https://help.qlik.com/en-US/sense-developer/February2021/Subsystems/ProxyServiceAPI/Content/Sense_ProxyServiceAPI/ProxyServiceAPI-Proxy-API.htm // https://help.qlik.com/en-US/sense-developer/February2021/Subsystems/ProxyServiceAPI/Content/Sense_ProxyServiceAPI/ProxyServiceAPI-Proxy-API.htm
@@ -236,6 +244,17 @@ export async function getProxySessionStatsFromSense(serverName, host, virtualPro
rejectUnauthorized: globals.config.get('Butler-SOS.serversToMonitor.rejectUnauthorized'), rejectUnauthorized: globals.config.get('Butler-SOS.serversToMonitor.rejectUnauthorized'),
}); });
// Get timeout and retry settings from config with fallback defaults
const timeout = globals.config.has('Butler-SOS.userSessions.timeoutMilliseconds')
? globals.config.get('Butler-SOS.userSessions.timeoutMilliseconds')
: 30000;
const maxRetries = globals.config.has('Butler-SOS.userSessions.maxRetries')
? globals.config.get('Butler-SOS.userSessions.maxRetries')
: 3;
const retryDelay = globals.config.has('Butler-SOS.userSessions.retryDelayMilliseconds')
? globals.config.get('Butler-SOS.userSessions.retryDelayMilliseconds')
: 1000;
const vP = virtualProxy === '/' ? '' : `${virtualProxy}`; const vP = virtualProxy === '/' ? '' : `${virtualProxy}`;
const requestSettings = { const requestSettings = {
url: `https://${host}/qps${vP}/session?Xrfkey=abcdefghij987654`, url: `https://${host}/qps${vP}/session?Xrfkey=abcdefghij987654`,
@@ -247,7 +266,7 @@ export async function getProxySessionStatsFromSense(serverName, host, virtualPro
XVirtualProxy: virtualProxy, XVirtualProxy: virtualProxy,
}, },
httpsAgent, httpsAgent,
timeout: 5000, timeout,
maxRedirects: 5, maxRedirects: 5,
}; };
@@ -316,9 +335,44 @@ export async function getProxySessionStatsFromSense(serverName, host, virtualPro
} }
} }
} catch (err) { } catch (err) {
// Check if we should retry based on error type and retry count
const shouldRetry =
retryCount < maxRetries &&
(err.code === 'ECONNABORTED' || // Timeout
err.code === 'ECONNRESET' || // Connection reset
err.code === 'ETIMEDOUT' || // Network timeout
err.code === 'ENOTFOUND' || // DNS lookup failed
err.code === 'ENETUNREACH'); // Network unreachable
if (shouldRetry) {
// Calculate exponential backoff delay
const delay = retryDelay * Math.pow(2, retryCount);
globals.logger.warn(
`PROXY SESSIONS: Error calling proxy session API for server '${serverName}' (${host}), virtual proxy '${virtualProxy}': ${globals.getErrorMessage(err)}. Retrying in ${delay}ms (attempt ${retryCount + 1}/${maxRetries})...`
);
// Wait before retrying
await new Promise((resolve) => setTimeout(resolve, delay));
// Recursive retry
return getProxySessionStatsFromSense(
serverName,
host,
virtualProxy,
influxTags,
retryCount + 1
);
}
// Final error after all retries exhausted or non-retryable error
globals.logger.error( globals.logger.error(
`PROXY SESSIONS: Error when calling proxy session API for server '${serverName}' (${host}), virtual proxy '${virtualProxy}': ${globals.getErrorMessage(err)}` `PROXY SESSIONS: Error when calling proxy session API for server '${serverName}' (${host}), virtual proxy '${virtualProxy}': ${globals.getErrorMessage(err)}`
); );
if (retryCount > 0) {
globals.logger.error(
`PROXY SESSIONS: Failed after ${retryCount} ${retryCount === 1 ? 'retry' : 'retries'}`
);
}
} }
} }