Files
web-check/api/linked-pages.js
Alicia Sykes 1298b9431d ref: Reliability improvments and fixes
- Sitemap endpoint now recursively expands sitemap-index files
  - Fixes #165
- Strips :port from target URLs in get-ip, dns, dns-server, ports, mail-config
  - Fixes #203
- Configurable trust proxy (TRUST_PROXY env) so app works behind Traefik/nginx
  - Fixes #157
- Tranco rank now correctly says "top 1 million" (was "100 million")
  - Fixes #257
- Adds engines.node ">=20" so Vercel picks a supported runtime
  - Re #212
- Raises Vercel maxDuration from 10s to 60s, cutting most 504 timeouts
  - Re #251
  - Re #287
- Bumps axios 1.4.8 to 1.16, closing 4 high-severity SSRF/DoS CVEs
  - Re #289
- Fixes mail-config crash where dns module was awaited as if promise-based
- Adds reusable structured logging util for the API
- Bumps a whole bunch of deps, and resolves lots of open npm CVEs
2026-05-04 14:32:51 +01:00

50 lines
2.0 KiB
JavaScript

import axios from 'axios';
import * as cheerio from 'cheerio';
import urlLib from 'url';
import middleware from './_common/middleware.js';
const linkedPagesHandler = async (url) => {
const response = await axios.get(url);
const html = response.data;
const $ = cheerio.load(html);
const internalLinksMap = new Map();
const externalLinksMap = new Map();
// Get all links on the page
$('a[href]').each((i, link) => {
const href = $(link).attr('href');
const absoluteUrl = urlLib.resolve(url, href);
// Check if absolute / relative, append to appropriate map or increment occurrence count
if (absoluteUrl.startsWith(url)) {
const count = internalLinksMap.get(absoluteUrl) || 0;
internalLinksMap.set(absoluteUrl, count + 1);
} else if (href.startsWith('http://') || href.startsWith('https://')) {
const count = externalLinksMap.get(absoluteUrl) || 0;
externalLinksMap.set(absoluteUrl, count + 1);
}
});
// Sort by most occurrences, remove supplicates, and convert to array
const internalLinks = [...internalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
const externalLinks = [...externalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
// If there were no links, then mark as skipped and show reasons
if (internalLinks.length === 0 && externalLinks.length === 0) {
return {
statusCode: 400,
body: {
skipped: 'No internal or external links found. '
+ 'This may be due to the website being dynamically rendered, using a client-side framework (like React), and without SSR enabled. '
+ 'That would mean that the static HTML returned from the HTTP request doesn\'t contain any meaningful content for Web-Check to analyze. '
+ 'You can rectify this by using a headless browser to render the page instead.',
},
};
}
return { internal: internalLinks, external: externalLinks };
};
export const handler = middleware(linkedPagesHandler);
export default handler;