mirror of
https://github.com/Lissy93/web-check.git
synced 2026-05-12 21:00:38 -04:00
ref: Reliability improvments and fixes
- Sitemap endpoint now recursively expands sitemap-index files - Fixes #165 - Strips :port from target URLs in get-ip, dns, dns-server, ports, mail-config - Fixes #203 - Configurable trust proxy (TRUST_PROXY env) so app works behind Traefik/nginx - Fixes #157 - Tranco rank now correctly says "top 1 million" (was "100 million") - Fixes #257 - Adds engines.node ">=20" so Vercel picks a supported runtime - Re #212 - Raises Vercel maxDuration from 10s to 60s, cutting most 504 timeouts - Re #251 - Re #287 - Bumps axios 1.4.8 to 1.16, closing 4 high-severity SSRF/DoS CVEs - Re #289 - Fixes mail-config crash where dns module was awaited as if promise-based - Adds reusable structured logging util for the API - Bumps a whole bunch of deps, and resolves lots of open npm CVEs
This commit is contained in:
@@ -2,6 +2,9 @@
|
||||
# Be sure to uncomment any line you populate
|
||||
# Everything is optional, but some features won't work without external API access
|
||||
|
||||
# Kill switch for the public/hosted instance
|
||||
# VITE_DISABLE_EVERYTHING='false'
|
||||
|
||||
# API Keys for external services (backend)
|
||||
GOOGLE_CLOUD_API_KEY=''
|
||||
TORRENT_IP_API_KEY=''
|
||||
@@ -11,10 +14,8 @@ URL_SCAN_API_KEY=''
|
||||
TRANCO_USERNAME=''
|
||||
TRANCO_API_KEY=''
|
||||
CLOUDMERSIVE_API_KEY=''
|
||||
|
||||
# API Keys for external services (frontend)
|
||||
REACT_APP_SHODAN_API_KEY=''
|
||||
REACT_APP_WHO_API_KEY=''
|
||||
SHODAN_API_KEY=''
|
||||
WHO_API_KEY=''
|
||||
|
||||
# Configuration settings
|
||||
# CHROME_PATH='/usr/bin/chromium' # The path the the Chromium executable
|
||||
@@ -25,3 +26,6 @@ REACT_APP_WHO_API_KEY=''
|
||||
# API_ENABLE_RATE_LIMIT='true' # Enable rate limiting for the API
|
||||
# REACT_APP_API_ENDPOINT='/api' # The endpoint for the API (can be local or remote)
|
||||
# ENABLE_ANALYTICS='false' # Enable Plausible hit counter for the frontend
|
||||
# BOSS_SERVER='false' # Marketing homepage (only used by official instance)
|
||||
# TRUST_PROXY='1' # Set if running behind a reverse proxy (Traefik, nginx, etc).
|
||||
# Use a number of hops (e.g. '1'), 'true', or a CIDR list.
|
||||
2
.github/README.md
vendored
2
.github/README.md
vendored
@@ -660,7 +660,7 @@ This is useful for understanding the history of a site, and how it has changed o
|
||||
<img width="300" src="https://pixelflare.cc/alicia/web-check/wc-rank" align="right" />
|
||||
|
||||
###### Description
|
||||
This check shows the global rank of the requested site. This is only accurate for websites which are in the top 100 million list. We're using data from the Tranco project (see below), which collates the top sites on the web from Umbrella, Majestic, Quantcast, the Chrome User Experience Report and Cloudflare Radar.
|
||||
This check shows the global rank of the requested site. This is only accurate for websites which are in the top 1 million list. We're using data from the Tranco project (see below), which collates the top sites on the web from Umbrella, Majestic, Quantcast, the Chrome User Experience Report and Cloudflare Radar.
|
||||
|
||||
###### Use Cases
|
||||
Knowing a websites overall global rank can be useful for understanding the scale of the site, and for comparing it to other sites. It can also be useful for understanding the relative popularity of a site, and for identifying potential trends.
|
||||
|
||||
28
api/_common/logger.js
Normal file
28
api/_common/logger.js
Normal file
@@ -0,0 +1,28 @@
|
||||
// Lightweight structured logger. Honours LOG_LEVEL env (debug, info, warn, error, silent).
|
||||
const LEVELS = { debug: 10, info: 20, warn: 30, error: 40, silent: 99 };
|
||||
const THRESHOLD = LEVELS[(process.env.LOG_LEVEL || 'info').toLowerCase()] ?? LEVELS.info;
|
||||
|
||||
const fmt = (level, scope, msg, extra) => {
|
||||
const ts = new Date().toISOString();
|
||||
const tag = scope ? `[${scope}] ` : '';
|
||||
const body = typeof msg === 'string' ? msg : JSON.stringify(msg);
|
||||
const tail = extra === undefined
|
||||
? ''
|
||||
: ` ${typeof extra === 'string' ? extra : JSON.stringify(extra)}`;
|
||||
return `${ts} ${level.toUpperCase().padEnd(5)} ${tag}${body}${tail}`;
|
||||
};
|
||||
|
||||
const write = (level, stream, scope, msg, extra) => {
|
||||
if (LEVELS[level] < THRESHOLD) return;
|
||||
stream.write(fmt(level, scope, msg, extra) + '\n');
|
||||
};
|
||||
|
||||
// Returns a logger pinned to a scope (e.g. an API route name).
|
||||
export const createLogger = (scope) => ({
|
||||
debug: (msg, extra) => write('debug', process.stdout, scope, msg, extra),
|
||||
info: (msg, extra) => write('info', process.stdout, scope, msg, extra),
|
||||
warn: (msg, extra) => write('warn', process.stderr, scope, msg, extra),
|
||||
error: (msg, extra) => write('error', process.stderr, scope, msg, extra),
|
||||
});
|
||||
|
||||
export default createLogger;
|
||||
18
api/_common/parse-target.js
Normal file
18
api/_common/parse-target.js
Normal file
@@ -0,0 +1,18 @@
|
||||
// Parse a user-supplied target into a normalised form.
|
||||
// Strips protocol/port/path so DNS-touching endpoints get a bare hostname.
|
||||
export const parseTarget = (input) => {
|
||||
if (!input) throw new Error('No target provided');
|
||||
const normalised = /^https?:\/\//i.test(input) ? input : `https://${input}`;
|
||||
let u;
|
||||
try { u = new URL(normalised); }
|
||||
catch (err) { throw new Error(`Invalid URL: ${input}`); }
|
||||
return {
|
||||
hostname: u.hostname,
|
||||
port: u.port || null,
|
||||
protocol: u.protocol,
|
||||
pathname: u.pathname || '/',
|
||||
href: u.href,
|
||||
};
|
||||
};
|
||||
|
||||
export default parseTarget;
|
||||
@@ -1,10 +1,11 @@
|
||||
import { promises as dnsPromises, lookup } from 'dns';
|
||||
import { promises as dnsPromises } from 'dns';
|
||||
import axios from 'axios';
|
||||
import middleware from './_common/middleware.js';
|
||||
import { parseTarget } from './_common/parse-target.js';
|
||||
|
||||
const dnsHandler = async (url) => {
|
||||
try {
|
||||
const domain = url.replace(/^(?:https?:\/\/)?/i, "");
|
||||
const { hostname: domain } = parseTarget(url);
|
||||
const addresses = await dnsPromises.resolve4(domain);
|
||||
const results = await Promise.all(addresses.map(async (address) => {
|
||||
const hostname = await dnsPromises.reverse(address).catch(() => null);
|
||||
@@ -22,19 +23,7 @@ const dnsHandler = async (url) => {
|
||||
};
|
||||
}));
|
||||
|
||||
// let dohMozillaSupport = false;
|
||||
// try {
|
||||
// const mozillaList = await axios.get('https://firefox.settings.services.mozilla.com/v1/buckets/security-state/collections/onecrl/records');
|
||||
// dohMozillaSupport = results.some(({ hostname }) => mozillaList.data.data.some(({ id }) => id.includes(hostname)));
|
||||
// } catch (error) {
|
||||
// console.error(error);
|
||||
// }
|
||||
|
||||
return {
|
||||
domain,
|
||||
dns: results,
|
||||
// dohMozillaSupport,
|
||||
};
|
||||
return { domain, dns: results };
|
||||
} catch (error) {
|
||||
throw new Error(`An error occurred while resolving DNS. ${error.message}`); // This will be caught and handled by the commonMiddleware
|
||||
}
|
||||
|
||||
@@ -1,14 +1,10 @@
|
||||
import dns from 'dns';
|
||||
import util from 'util';
|
||||
import middleware from './_common/middleware.js';
|
||||
import { parseTarget } from './_common/parse-target.js';
|
||||
|
||||
const dnsHandler = async (url) => {
|
||||
let hostname = url;
|
||||
|
||||
// Handle URLs by extracting hostname
|
||||
if (hostname.startsWith('http://') || hostname.startsWith('https://')) {
|
||||
hostname = new URL(hostname).hostname;
|
||||
}
|
||||
const { hostname } = parseTarget(url);
|
||||
|
||||
try {
|
||||
const lookupPromise = util.promisify(dns.lookup);
|
||||
|
||||
@@ -1,23 +1,18 @@
|
||||
import dns from 'dns';
|
||||
import middleware from './_common/middleware.js';
|
||||
import { parseTarget } from './_common/parse-target.js';
|
||||
|
||||
const lookupAsync = (address) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
dns.lookup(address, (err, ip, family) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve({ ip, family });
|
||||
}
|
||||
});
|
||||
// Resolve the IP address for the target hostname.
|
||||
const lookupAsync = (address) => new Promise((resolve, reject) => {
|
||||
dns.lookup(address, (err, ip, family) => {
|
||||
if (err) reject(err); else resolve({ ip, family });
|
||||
});
|
||||
};
|
||||
});
|
||||
|
||||
const ipHandler = async (url) => {
|
||||
const address = url.replaceAll('https://', '').replaceAll('http://', '');
|
||||
return await lookupAsync(address);
|
||||
const { hostname } = parseTarget(url);
|
||||
return await lookupAsync(hostname);
|
||||
};
|
||||
|
||||
|
||||
export const handler = middleware(ipHandler);
|
||||
export default handler;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import axios from 'axios';
|
||||
import cheerio from 'cheerio';
|
||||
import * as cheerio from 'cheerio';
|
||||
import urlLib from 'url';
|
||||
import middleware from './_common/middleware.js';
|
||||
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
import dns from 'dns';
|
||||
import URL from 'url-parse';
|
||||
import dns from 'dns/promises';
|
||||
import middleware from './_common/middleware.js';
|
||||
|
||||
// TODO: Fix.
|
||||
import { parseTarget } from './_common/parse-target.js';
|
||||
|
||||
const mailConfigHandler = async (url, event, context) => {
|
||||
try {
|
||||
const domain = new URL(url).hostname || new URL(url).pathname;
|
||||
const { hostname: domain } = parseTarget(url);
|
||||
|
||||
// Get MX records
|
||||
const mxRecords = await dns.resolveMx(domain);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import net from 'net';
|
||||
import middleware from './_common/middleware.js';
|
||||
import { parseTarget } from './_common/parse-target.js';
|
||||
|
||||
// A list of commonly used ports.
|
||||
const DEFAULT_PORTS_TO_CHECK = [
|
||||
@@ -40,7 +41,7 @@ async function checkPort(port, domain) {
|
||||
}
|
||||
|
||||
const portsHandler = async (url, event, context) => {
|
||||
const domain = url.replace(/(^\w+:|^)\/\//, '');
|
||||
const { hostname: domain } = parseTarget(url);
|
||||
|
||||
const delay = ms => new Promise(res => setTimeout(res, ms));
|
||||
const timeout = delay(9000);
|
||||
|
||||
@@ -13,7 +13,7 @@ const rankHandler = async (url) => {
|
||||
`https://tranco-list.eu/api/ranks/domain/${domain}`, { timeout: 5000 }, auth,
|
||||
);
|
||||
if (!response.data || !response.data.ranks || response.data.ranks.length === 0) {
|
||||
return { skipped: `Skipping, as ${domain} isn't ranked in the top 100 million sites yet.`};
|
||||
return { skipped: `Skipping, as ${domain} isn't ranked in the top 1 million sites yet.`};
|
||||
}
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,23 +1,20 @@
|
||||
import puppeteer from 'puppeteer-core';
|
||||
import chromium from 'chrome-aws-lambda';
|
||||
import middleware from './_common/middleware.js';
|
||||
import { randomUUID } from 'crypto';
|
||||
import { execFile } from 'child_process';
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import pkg from 'uuid';
|
||||
const { v4: uuidv4 } = pkg;
|
||||
import middleware from './_common/middleware.js';
|
||||
import { createLogger } from './_common/logger.js';
|
||||
|
||||
// Helper function for direct chromium screenshot as fallback
|
||||
const log = createLogger('screenshot');
|
||||
|
||||
// Capture a screenshot via the system Chromium binary; faster cold-start than puppeteer.
|
||||
const directChromiumScreenshot = async (url) => {
|
||||
console.log(`[DIRECT-SCREENSHOT] Starting direct screenshot process for URL: ${url}`);
|
||||
|
||||
// Create a tmp filename
|
||||
const tmpDir = '/tmp';
|
||||
const uuid = uuidv4();
|
||||
const screenshotPath = path.join(tmpDir, `screenshot-${uuid}.png`);
|
||||
|
||||
console.log(`[DIRECT-SCREENSHOT] Will save screenshot to: ${screenshotPath}`);
|
||||
|
||||
const screenshotPath = path.join(tmpDir, `screenshot-${randomUUID()}.png`);
|
||||
log.debug(`direct method, saving to ${screenshotPath}`);
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const chromePath = process.env.CHROME_PATH || '/usr/bin/chromium';
|
||||
const args = [
|
||||
@@ -25,119 +22,67 @@ const directChromiumScreenshot = async (url) => {
|
||||
'--disable-gpu',
|
||||
'--no-sandbox',
|
||||
`--screenshot=${screenshotPath}`,
|
||||
url
|
||||
url,
|
||||
];
|
||||
|
||||
console.log(`[DIRECT-SCREENSHOT] Executing: ${chromePath} ${args.join(' ')}`);
|
||||
|
||||
execFile(chromePath, args, async (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
console.error(`[DIRECT-SCREENSHOT] Chromium error: ${error.message}`);
|
||||
return reject(error);
|
||||
}
|
||||
|
||||
execFile(chromePath, args, async (error) => {
|
||||
if (error) return reject(error);
|
||||
try {
|
||||
// Read the screenshot file
|
||||
const screenshotData = await fs.readFile(screenshotPath);
|
||||
console.log(`[DIRECT-SCREENSHOT] Screenshot read successfully`);
|
||||
|
||||
// Convert to base64
|
||||
const base64Data = screenshotData.toString('base64');
|
||||
|
||||
const buf = await fs.readFile(screenshotPath);
|
||||
await fs.unlink(screenshotPath).catch(err =>
|
||||
console.warn(`[DIRECT-SCREENSHOT] Failed to delete temp file: ${err.message}`)
|
||||
log.warn(`temp cleanup failed: ${err.message}`)
|
||||
);
|
||||
|
||||
resolve(base64Data);
|
||||
resolve(buf.toString('base64'));
|
||||
} catch (readError) {
|
||||
console.error(`[DIRECT-SCREENSHOT] Failed reading screenshot: ${readError.message}`);
|
||||
reject(readError);
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
const screenshotHandler = async (targetUrl) => {
|
||||
console.log(`[SCREENSHOT] Request received for URL: ${targetUrl}`);
|
||||
|
||||
if (!targetUrl) {
|
||||
console.error('[SCREENSHOT] URL is missing from queryStringParameters');
|
||||
throw new Error('URL is missing from queryStringParameters');
|
||||
}
|
||||
|
||||
if (!targetUrl.startsWith('http://') && !targetUrl.startsWith('https://')) {
|
||||
targetUrl = 'http://' + targetUrl;
|
||||
}
|
||||
|
||||
try {
|
||||
new URL(targetUrl);
|
||||
} catch (error) {
|
||||
console.error(`[SCREENSHOT] URL provided is invalid: ${targetUrl}`);
|
||||
throw new Error('URL provided is invalid');
|
||||
}
|
||||
|
||||
// First try direct Chromium
|
||||
try {
|
||||
console.log(`[SCREENSHOT] Using direct Chromium method for URL: ${targetUrl}`);
|
||||
const base64Screenshot = await directChromiumScreenshot(targetUrl);
|
||||
console.log(`[SCREENSHOT] Direct screenshot successful`);
|
||||
return { image: base64Screenshot };
|
||||
} catch (directError) {
|
||||
console.error(`[SCREENSHOT] Direct screenshot method failed: ${directError.message}`);
|
||||
console.log(`[SCREENSHOT] Falling back to puppeteer method...`);
|
||||
}
|
||||
|
||||
// fall back puppeteer
|
||||
// Fallback path that uses puppeteer with the bundled chrome-aws-lambda binary.
|
||||
const puppeteerScreenshot = async (targetUrl) => {
|
||||
let browser = null;
|
||||
try {
|
||||
console.log(`[SCREENSHOT] Launching puppeteer browser`);
|
||||
browser = await puppeteer.launch({
|
||||
args: [...chromium.args, '--no-sandbox'], // Add --no-sandbox flag
|
||||
args: [...chromium.args, '--no-sandbox'],
|
||||
defaultViewport: { width: 800, height: 600 },
|
||||
executablePath: process.env.CHROME_PATH || '/usr/bin/chromium',
|
||||
headless: true,
|
||||
ignoreHTTPSErrors: true,
|
||||
ignoreDefaultArgs: ['--disable-extensions'],
|
||||
});
|
||||
|
||||
console.log(`[SCREENSHOT] Creating new page`);
|
||||
let page = await browser.newPage();
|
||||
|
||||
console.log(`[SCREENSHOT] Setting page preferences`);
|
||||
const page = await browser.newPage();
|
||||
await page.emulateMediaFeatures([{ name: 'prefers-color-scheme', value: 'dark' }]);
|
||||
page.setDefaultNavigationTimeout(8000);
|
||||
|
||||
console.log(`[SCREENSHOT] Navigating to URL: ${targetUrl}`);
|
||||
await page.goto(targetUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
console.log(`[SCREENSHOT] Checking if body element exists`);
|
||||
await page.evaluate(() => {
|
||||
const selector = 'body';
|
||||
return new Promise((resolve, reject) => {
|
||||
const element = document.querySelector(selector);
|
||||
if (!element) {
|
||||
reject(new Error(`Error: No element found with selector: ${selector}`));
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
if (!document.querySelector('body')) {
|
||||
throw new Error('No body element found on the page');
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`[SCREENSHOT] Taking screenshot`);
|
||||
const screenshotBuffer = await page.screenshot();
|
||||
|
||||
console.log(`[SCREENSHOT] Converting screenshot to base64`);
|
||||
const base64Screenshot = screenshotBuffer.toString('base64');
|
||||
|
||||
console.log(`[SCREENSHOT] Screenshot complete, returning image`);
|
||||
return { image: base64Screenshot };
|
||||
} catch (error) {
|
||||
console.error(`[SCREENSHOT] Puppeteer screenshot failed: ${error.message}`);
|
||||
throw error;
|
||||
const buffer = await page.screenshot();
|
||||
return buffer.toString('base64');
|
||||
} finally {
|
||||
if (browser !== null) {
|
||||
console.log(`[SCREENSHOT] Closing browser`);
|
||||
await browser.close();
|
||||
}
|
||||
if (browser) await browser.close().catch(() => {});
|
||||
}
|
||||
};
|
||||
|
||||
const screenshotHandler = async (targetUrl) => {
|
||||
if (!targetUrl) throw new Error('URL is missing from queryStringParameters');
|
||||
try { new URL(targetUrl); }
|
||||
catch { throw new Error('URL provided is invalid'); }
|
||||
|
||||
log.debug(`request received: ${targetUrl}`);
|
||||
try {
|
||||
return { image: await directChromiumScreenshot(targetUrl) };
|
||||
} catch (directError) {
|
||||
log.warn(`direct chromium failed, falling back to puppeteer: ${directError.message}`);
|
||||
}
|
||||
try {
|
||||
return { image: await puppeteerScreenshot(targetUrl) };
|
||||
} catch (error) {
|
||||
log.error(`puppeteer screenshot failed: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -2,52 +2,72 @@ import axios from 'axios';
|
||||
import xml2js from 'xml2js';
|
||||
import middleware from './_common/middleware.js';
|
||||
|
||||
const HARD_TIMEOUT = 5000;
|
||||
const MAX_DEPTH = 3;
|
||||
const MAX_CHILD_SITEMAPS = 25;
|
||||
|
||||
// Fetch a single XML sitemap and parse it.
|
||||
const fetchSitemap = async (sitemapUrl) => {
|
||||
const res = await axios.get(sitemapUrl, { timeout: HARD_TIMEOUT });
|
||||
return new xml2js.Parser().parseStringPromise(res.data);
|
||||
};
|
||||
|
||||
// Find a sitemap URL listed in robots.txt as a fallback when /sitemap.xml is missing.
|
||||
const findSitemapInRobots = async (baseUrl) => {
|
||||
const robots = await axios.get(`${baseUrl}/robots.txt`, { timeout: HARD_TIMEOUT });
|
||||
for (const line of robots.data.split('\n')) {
|
||||
if (line.toLowerCase().startsWith('sitemap:')) {
|
||||
return line.split(/\s+/)[1]?.trim() || null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
// Recursively expand a sitemap-index into its child url sets.
|
||||
const expandSitemap = async (parsed, depth) => {
|
||||
if (!parsed?.sitemapindex?.sitemap || depth >= MAX_DEPTH) return parsed;
|
||||
const children = parsed.sitemapindex.sitemap
|
||||
.map(s => s?.loc?.[0])
|
||||
.filter(Boolean)
|
||||
.slice(0, MAX_CHILD_SITEMAPS);
|
||||
const fetched = await Promise.all(
|
||||
children.map(loc => fetchSitemap(loc).catch(err => ({ error: err.message, loc })))
|
||||
);
|
||||
const expanded = await Promise.all(
|
||||
fetched.map(child => child?.error ? child : expandSitemap(child, depth + 1))
|
||||
);
|
||||
const urls = expanded.flatMap(child => child?.urlset?.url || []);
|
||||
return {
|
||||
sitemapindex: parsed.sitemapindex,
|
||||
urlset: urls.length ? { url: urls } : undefined,
|
||||
sources: children,
|
||||
};
|
||||
};
|
||||
|
||||
const sitemapHandler = async (url) => {
|
||||
let sitemapUrl = `${url}/sitemap.xml`;
|
||||
|
||||
const hardTimeOut = 5000;
|
||||
|
||||
try {
|
||||
// Try to fetch sitemap directly
|
||||
let sitemapRes;
|
||||
let parsed;
|
||||
try {
|
||||
sitemapRes = await axios.get(sitemapUrl, { timeout: hardTimeOut });
|
||||
parsed = await fetchSitemap(sitemapUrl);
|
||||
} catch (error) {
|
||||
if (error.response && error.response.status === 404) {
|
||||
// If sitemap not found, try to fetch it from robots.txt
|
||||
const robotsRes = await axios.get(`${url}/robots.txt`, { timeout: hardTimeOut });
|
||||
const robotsTxt = robotsRes.data.split('\n');
|
||||
|
||||
for (let line of robotsTxt) {
|
||||
if (line.toLowerCase().startsWith('sitemap:')) {
|
||||
sitemapUrl = line.split(' ')[1].trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!sitemapUrl) {
|
||||
return { skipped: 'No sitemap found' };
|
||||
}
|
||||
|
||||
sitemapRes = await axios.get(sitemapUrl, { timeout: hardTimeOut });
|
||||
const robotsSitemap = await findSitemapInRobots(url);
|
||||
if (!robotsSitemap) return { skipped: 'No sitemap found' };
|
||||
sitemapUrl = robotsSitemap;
|
||||
parsed = await fetchSitemap(sitemapUrl);
|
||||
} else {
|
||||
throw error; // If other error, throw it
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
const parser = new xml2js.Parser();
|
||||
const sitemap = await parser.parseStringPromise(sitemapRes.data);
|
||||
|
||||
return sitemap;
|
||||
return await expandSitemap(parsed, 0);
|
||||
} catch (error) {
|
||||
if (error.code === 'ECONNABORTED') {
|
||||
return { error: `Request timed-out after ${hardTimeOut}ms` };
|
||||
} else {
|
||||
return { error: error.message };
|
||||
return { error: `Request timed-out after ${HARD_TIMEOUT}ms` };
|
||||
}
|
||||
return { error: error.message };
|
||||
}
|
||||
};
|
||||
|
||||
export const handler = middleware(sitemapHandler);
|
||||
export default handler;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import axios from 'axios';
|
||||
import cheerio from 'cheerio';
|
||||
import * as cheerio from 'cheerio';
|
||||
import middleware from './_common/middleware.js';
|
||||
|
||||
const socialTagsHandler = async (url) => {
|
||||
|
||||
@@ -2,6 +2,9 @@ import net from 'net';
|
||||
import psl from 'psl';
|
||||
import axios from 'axios';
|
||||
import middleware from './_common/middleware.js';
|
||||
import { createLogger } from './_common/logger.js';
|
||||
|
||||
const log = createLogger('whois');
|
||||
|
||||
const getBaseDomain = (url) => {
|
||||
let protocol = '';
|
||||
@@ -78,7 +81,7 @@ const fetchFromMyAPI = async (hostname) => {
|
||||
});
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error fetching data from your API:', error.message);
|
||||
log.error(`whois proxy fetch failed: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
86
package.json
86
package.json
@@ -3,6 +3,9 @@
|
||||
"type": "module",
|
||||
"version": "2.0.2",
|
||||
"homepage": "https://web-check.xyz",
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node server",
|
||||
"start-pm": "pm2 start server.js -i max",
|
||||
@@ -15,59 +18,60 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/check": "^0.5.10",
|
||||
"@astrojs/react": "^3.3.2",
|
||||
"@emotion/react": "^11.11.4",
|
||||
"@emotion/styled": "^11.11.5",
|
||||
"@fortawesome/fontawesome-svg-core": "^6.5.2",
|
||||
"@fortawesome/free-brands-svg-icons": "^6.5.2",
|
||||
"@fortawesome/free-regular-svg-icons": "^6.5.2",
|
||||
"@fortawesome/free-solid-svg-icons": "^6.5.2",
|
||||
"@fortawesome/svelte-fontawesome": "^0.2.2",
|
||||
"@types/react": "^18.3.1",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"astro": "^4.7.1",
|
||||
"axios": "^1.4.8",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"@astrojs/react": "^3.6.3",
|
||||
"@emotion/react": "^11.14.0",
|
||||
"@emotion/styled": "^11.14.1",
|
||||
"@fortawesome/fontawesome-svg-core": "^6.7.2",
|
||||
"@fortawesome/free-brands-svg-icons": "^6.7.2",
|
||||
"@fortawesome/free-regular-svg-icons": "^6.7.2",
|
||||
"@fortawesome/free-solid-svg-icons": "^6.7.2",
|
||||
"@fortawesome/svelte-fontawesome": "^0.2.4",
|
||||
"@types/react": "^18.3.28",
|
||||
"@types/react-dom": "^18.3.7",
|
||||
"astro": "^4.16.19",
|
||||
"axios": "^1.16.0",
|
||||
"cheerio": "^1.2.0",
|
||||
"chrome-aws-lambda": "^10.1.0",
|
||||
"chromium": "^3.0.3",
|
||||
"connect-history-api-fallback": "^2.0.0",
|
||||
"cors": "^2.8.5",
|
||||
"csv-parser": "^3.0.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"express": "^4.19.2",
|
||||
"express-rate-limit": "^7.2.0",
|
||||
"framer-motion": "^11.2.6",
|
||||
"got": "^14.2.1",
|
||||
"pm2": "^5.3.1",
|
||||
"psl": "^1.9.0",
|
||||
"puppeteer": "^22.8.0",
|
||||
"puppeteer-core": "^22.8.0",
|
||||
"csv-parser": "^3.2.0",
|
||||
"dotenv": "^16.6.1",
|
||||
"express": "^4.21.2",
|
||||
"express-rate-limit": "^7.5.1",
|
||||
"framer-motion": "^11.18.2",
|
||||
"got": "^14.6.6",
|
||||
"pm2": "^5.4.3",
|
||||
"psl": "^1.15.0",
|
||||
"puppeteer": "^22.15.0",
|
||||
"puppeteer-core": "^22.15.0",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-masonry-css": "^1.0.16",
|
||||
"react-router-dom": "^6.23.0",
|
||||
"react-router-dom": "^6.30.3",
|
||||
"react-simple-maps": "^3.0.0",
|
||||
"react-toastify": "^10.0.5",
|
||||
"recharts": "^2.12.6",
|
||||
"svelte": "^4.2.17",
|
||||
"react-toastify": "^10.0.6",
|
||||
"recharts": "^2.15.4",
|
||||
"svelte": "^4.2.20",
|
||||
"traceroute": "^1.0.0",
|
||||
"typescript": "^5.4.5",
|
||||
"unzipper": "^0.11.5",
|
||||
"typescript": "^5.9.3",
|
||||
"unzipper": "^0.11.6",
|
||||
"url-parse": "^1.5.10",
|
||||
"wappalyzer": "^6.10.65",
|
||||
"wappalyzer": "^6.10.66",
|
||||
"xml2js": "^0.6.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@astrojs/cloudflare": "^10.2.5",
|
||||
"@astrojs/netlify": "^5.2.0",
|
||||
"@astrojs/node": "^8.2.5",
|
||||
"@astrojs/partytown": "^2.1.0",
|
||||
"@astrojs/sitemap": "^3.1.4",
|
||||
"@astrojs/svelte": "^5.4.0",
|
||||
"@astrojs/ts-plugin": "^1.6.1",
|
||||
"@astrojs/vercel": "^7.5.4",
|
||||
"@astrojs/cloudflare": "^10.4.2",
|
||||
"@astrojs/netlify": "^5.5.4",
|
||||
"@astrojs/node": "^8.3.4",
|
||||
"@astrojs/partytown": "^2.1.7",
|
||||
"@astrojs/sitemap": "~3.4.1",
|
||||
"@astrojs/svelte": "^5.7.3",
|
||||
"@astrojs/ts-plugin": "^1.10.7",
|
||||
"@astrojs/vercel": "^7.8.2",
|
||||
"concurrently": "^8.2.2",
|
||||
"nodemon": "^3.1.0",
|
||||
"sass": "^1.77.1"
|
||||
}
|
||||
"nodemon": "^3.1.14",
|
||||
"sass": "^1.99.0"
|
||||
},
|
||||
"packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
|
||||
}
|
||||
|
||||
12
server.js
12
server.js
@@ -13,6 +13,18 @@ dotenv.config();
|
||||
// Create the Express app
|
||||
const app = express();
|
||||
|
||||
// Trust X-Forwarded-* headers when running behind a reverse proxy
|
||||
// (e.g. Traefik, nginx). Configurable via TRUST_PROXY env var.
|
||||
const trustProxy = process.env.TRUST_PROXY;
|
||||
if (trustProxy !== undefined && trustProxy !== '') {
|
||||
const parsed = /^\d+$/.test(trustProxy)
|
||||
? parseInt(trustProxy, 10)
|
||||
: trustProxy === 'true' ? true
|
||||
: trustProxy === 'false' ? false
|
||||
: trustProxy;
|
||||
app.set('trust proxy', parsed);
|
||||
}
|
||||
|
||||
const __filename = new URL(import.meta.url).pathname;
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
|
||||
@@ -446,7 +446,7 @@ const docs: Doc[] = [
|
||||
{
|
||||
id: 'rank',
|
||||
title: 'Global Ranking',
|
||||
description: 'This check shows the global rank of the requested site. This is only accurate for websites which are in the top 100 million list. We\'re using data from the Tranco project (see below), which collates the top sites on the web from Umbrella, Majestic, Quantcast, the Chrome User Experience Report and Cloudflare Radar.',
|
||||
description: 'This check shows the global rank of the requested site. This is only accurate for websites which are in the top 1 million list. We\'re using data from the Tranco project (see below), which collates the top sites on the web from Umbrella, Majestic, Quantcast, the Chrome User Experience Report and Cloudflare Radar.',
|
||||
use: 'Knowing a websites overall global rank can be useful for understanding the scale of the site, and for comparing it to other sites. It can also be useful for understanding the relative popularity of a site, and for identifying potential trends.',
|
||||
resources: [
|
||||
{ title: 'Tranco List', link: 'https://tranco-list.eu/' },
|
||||
|
||||
@@ -8,14 +8,13 @@
|
||||
],
|
||||
"functions": {
|
||||
"api/*.js": {
|
||||
"maxDuration": 10
|
||||
"maxDuration": 20
|
||||
}
|
||||
},
|
||||
"env": {
|
||||
"PLATFORM": "vercel",
|
||||
"CI": "false",
|
||||
"CHROME_PATH": "/usr/bin/chromium",
|
||||
"NODE_VERSION": "21.x",
|
||||
"GOOGLE_CLOUD_API_KEY": "",
|
||||
"BUILT_WITH_API_KEY": "",
|
||||
"REACT_APP_SHODAN_API_KEY": "",
|
||||
|
||||
Reference in New Issue
Block a user