Files
indiekit-endpoint-activitypub/lib/og-unfurl.js
Ricardo 12454749ad fix: comprehensive security, performance, and architecture audit fixes
27 issues fixed from multi-dimensional code review (4 Critical, 6 High, 11 Medium, 6 Low):

Security (Critical):
- Escape HTML in OAuth authorization page to prevent XSS (C1)
- Add CSRF protection to OAuth authorize flow (C2)
- Replace bypassable regex sanitizer with sanitize-html library (C3)
- Enforce OAuth scopes on all Mastodon API routes (C4)

Security (Medium/Low):
- Fix SSRF via DNS resolution before private IP check (M1)
- Add rate limiting to API, auth, and app registration endpoints (M2)
- Validate redirect_uri on POST /oauth/authorize (M4)
- Fix custom emoji URL injection with scheme validation + escaping (M5)
- Remove data: scheme from allowed image sources (L6)
- Add access token expiry (1hr) and refresh token rotation (90d) (M3)
- Hash client secrets before storage (L3)

Architecture:
- Extract batch-broadcast.js — shared delivery logic (H1a)
- Extract init-indexes.js — MongoDB index creation (H1b)
- Extract syndicator.js — syndication logic (H1c)
- Create federation-actions.js facade for controllers (M6)
- index.js reduced from 1810 to ~1169 lines (35%)

Performance:
- Cache moderation data with 30s TTL + write invalidation (H6)
- Increase inbox queue throughput to 10 items/sec (H5)
- Make account enrichment non-blocking with fire-and-forget (H4)
- Remove ephemeral getReplies/getLikes/getShares from ingest (M11)
- Fix LRU caches to use true LRU eviction (L1)
- Fix N+1 backfill queries with batch $in lookup (L2)

UI/UX:
- Split 3441-line reader.css into 15 feature-scoped files (H2)
- Extract inline Alpine.js interaction component (H3)
- Reduce sidebar navigation from 7 to 3 items (M7)
- Add ARIA live regions for dynamic content updates (M8)
- Extract shared CW/non-CW content partial (M9)
- Document form handling pattern convention (M10)
- Add accessible labels to functional emoji icons (L4)
- Convert profile editor to Alpine.js (L5)

Audit: documentation-central/audits/2026-03-24-activitypub-code-review.md
Plan: documentation-central/plans/2026-03-24-activitypub-audit-fixes.md
2026-03-25 07:41:20 +01:00

359 lines
11 KiB
JavaScript

/**
* OpenGraph metadata fetching with concurrency limiting
* @module og-unfurl
*/
import { lookup } from "node:dns/promises";
import { isIP } from "node:net";
import { unfurl } from "unfurl.js";
import { extractObjectData } from "./timeline-store.js";
import { lookupWithSecurity } from "./lookup-helpers.js";
const USER_AGENT =
"Mozilla/5.0 (compatible; Indiekit/1.0; +https://getindiekit.com)";
const TIMEOUT_MS = 10000; // 10 seconds per URL
const MAX_CONCURRENT = 3; // Lower than theme's 5 (inbox context)
const MAX_PREVIEWS = 3; // Max previews per post
// Concurrency limiter — prevents overwhelming outbound network
let activeRequests = 0;
const queue = [];
function runNext() {
if (queue.length === 0 || activeRequests >= MAX_CONCURRENT) return;
activeRequests++;
const { resolve: res, fn } = queue.shift();
fn()
.then(res)
.finally(() => {
activeRequests--;
runNext();
});
}
function throttled(fn) {
return new Promise((res) => {
queue.push({ resolve: res, fn });
runNext();
});
}
function extractDomain(url) {
try {
return new URL(url).hostname.replace(/^www\./, "");
} catch {
return url;
}
}
/**
* Check if an IP address is in a private/reserved range.
* @param {string} ip - IPv4 or IPv6 address
* @returns {boolean} True if private/reserved
*/
function isPrivateIP(ip) {
if (isIP(ip) === 4) {
const parts = ip.split(".").map(Number);
const [a, b] = parts;
if (a === 10) return true; // 10.0.0.0/8
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
if (a === 192 && b === 168) return true; // 192.168.0.0/16
if (a === 169 && b === 254) return true; // 169.254.0.0/16 (link-local)
if (a === 127) return true; // 127.0.0.0/8
if (a === 0) return true; // 0.0.0.0/8
}
if (isIP(ip) === 6) {
const lower = ip.toLowerCase();
if (lower.startsWith("fc") || lower.startsWith("fd")) return true; // ULA
if (lower.startsWith("fe80")) return true; // link-local
if (lower === "::1") return true; // loopback
}
return false;
}
/**
* Check if a URL resolves to a private/reserved IP (SSRF protection).
* Performs DNS resolution to defeat DNS rebinding attacks.
* @param {string} url - URL to check
* @returns {Promise<boolean>} True if URL targets a private network
*/
async function isPrivateResolved(url) {
try {
const urlObj = new URL(url);
// Block non-http(s) schemes
if (urlObj.protocol !== "http:" && urlObj.protocol !== "https:") {
return true;
}
const hostname = urlObj.hostname.toLowerCase().replace(/^\[|\]$/g, "");
// Block obvious localhost variants
if (hostname === "localhost") return true;
// If hostname is already an IP, check directly (no DNS needed)
if (isIP(hostname)) return isPrivateIP(hostname);
// DNS resolution — check the resolved IP
const { address } = await lookup(hostname);
return isPrivateIP(address);
} catch {
return true; // DNS failure or invalid URL — treat as private
}
}
/**
* Extract links from HTML content
* @param {string} html - Sanitized HTML content
* @returns {Array<{url: string, classes: string}>} Links with their class attributes
*/
function extractLinks(html) {
if (!html) return [];
const links = [];
// Match complete <a> tags and extract href + class from anywhere in attributes
const anchorRegex = /<a\s([^>]+)>/gi;
let match;
while ((match = anchorRegex.exec(html)) !== null) {
const attrs = match[1];
const hrefMatch = attrs.match(/href="([^"]+)"/);
const classMatch = attrs.match(/class="([^"]+)"/);
if (hrefMatch) {
links.push({ url: hrefMatch[1], classes: classMatch ? classMatch[1] : "" });
}
}
return links;
}
/**
* Check if URL is likely an ActivityPub object or media file
* @param {string} url - URL to check
* @returns {Promise<boolean>} True if URL should be skipped
*/
async function shouldSkipUrl(url) {
try {
const urlObj = new URL(url);
// SSRF protection — skip private/internal URLs
if (await isPrivateResolved(url)) {
return true;
}
// Skip media extensions
const mediaExtensions = /\.(jpg|jpeg|png|gif|webp|mp4|webm|mov|mp3|wav|ogg)$/i;
if (mediaExtensions.test(urlObj.pathname)) {
return true;
}
// Skip common AP object patterns (heuristic - not exhaustive)
const apPatterns = [
/\/@[\w.-]+\/\d+/, // Mastodon /@user/12345
/\/@[\w.-]+\/statuses\/[\w]+/, // GoToSocial /@user/statuses/id
/\/users\/[\w.-]+\/statuses\/\d+/, // Mastodon/Pleroma /users/user/statuses/12345
/\/objects\/[\w-]+/, // Pleroma/Akkoma /objects/uuid
/\/notice\/[\w]+/, // Pleroma /notice/id
/\/notes\/[\w]+/, // Misskey /notes/id
];
return apPatterns.some((pattern) => pattern.test(urlObj.pathname));
} catch {
return true; // Invalid URL, skip
}
}
/**
* Fetch OpenGraph metadata for external links in HTML content
* @param {string} html - Sanitized HTML content
* @returns {Promise<Array<{url: string, title: string, description: string, image: string, favicon: string, domain: string, fetchedAt: string}>>} Link preview objects
*/
export async function fetchLinkPreviews(html) {
if (!html) return [];
const links = extractLinks(html);
// Filter links — async because shouldSkipUrl performs DNS resolution
const filterResults = await Promise.all(
links.map(async (link) => {
// Skip mention links (class="mention")
if (link.classes.includes("mention")) return false;
// Skip hashtag links (class="hashtag")
if (link.classes.includes("hashtag")) return false;
// Skip AP object URLs and media files
if (await shouldSkipUrl(link.url)) return false;
return true;
}),
);
const urlsToFetch = links
.filter((_, index) => filterResults[index])
.map((link) => link.url)
.filter((url, index, self) => self.indexOf(url) === index) // Dedupe
.slice(0, MAX_PREVIEWS); // Cap at max
if (urlsToFetch.length === 0) return [];
// Fetch metadata for each URL (throttled)
const previews = await Promise.all(
urlsToFetch.map(async (url) => {
const metadata = await throttled(async () => {
try {
return await unfurl(url, {
timeout: TIMEOUT_MS,
headers: { "User-Agent": USER_AGENT },
});
} catch (error) {
console.warn(`[og-unfurl] Failed to fetch ${url}: ${error.message}`);
return null;
}
});
if (!metadata) return null;
const og = metadata.open_graph || {};
const tc = metadata.twitter_card || {};
const title = og.title || tc.title || metadata.title || extractDomain(url);
const description = og.description || tc.description || metadata.description || "";
const image = og.images?.[0]?.url || tc.images?.[0]?.url || null;
const favicon = metadata.favicon || null;
const domain = extractDomain(url);
// Truncate description
const maxDesc = 160;
const desc =
description.length > maxDesc
? description.slice(0, maxDesc).trim() + "\u2026"
: description;
return {
url,
title,
description: desc,
image,
favicon,
domain,
fetchedAt: new Date().toISOString(),
};
}),
);
// Filter out failed fetches (null results)
return previews.filter((preview) => preview !== null);
}
/**
* Fetch link previews and store them on a timeline item
* Fire-and-forget — caller does NOT await. Errors are caught and logged.
* @param {object} collections - MongoDB collections
* @param {string} uid - Timeline item UID
* @param {string} html - Post content HTML
* @returns {Promise<void>}
*/
export async function fetchAndStorePreviews(collections, uid, html) {
try {
const linkPreviews = await fetchLinkPreviews(html);
await collections.ap_timeline.updateOne(
{ uid },
{ $set: { linkPreviews } },
);
} catch (error) {
// Fire-and-forget — log errors but don't throw
console.error(
`[og-unfurl] Failed to store previews for ${uid}: ${error.message}`,
);
}
}
/**
* Fetch a quoted post's data and store it on the timeline item.
* Fire-and-forget — caller does NOT await. Errors are caught and logged.
* @param {object} collections - MongoDB collections
* @param {string} uid - Timeline item UID (the quoting post)
* @param {string} quoteUrl - URL of the quoted post
* @param {object} ctx - Fedify context (for lookupObject)
* @param {object} documentLoader - Authenticated DocumentLoader
* @returns {Promise<void>}
*/
export async function fetchAndStoreQuote(collections, uid, quoteUrl, ctx, documentLoader) {
try {
const object = await lookupWithSecurity(ctx,new URL(quoteUrl), { documentLoader });
if (!object) return;
const quoteData = await extractObjectData(object, { documentLoader });
// If author photo is empty, try fetching the actor directly
if (!quoteData.author.photo && quoteData.author.url) {
try {
const actor = await lookupWithSecurity(ctx,new URL(quoteData.author.url), { documentLoader });
if (actor) {
const { extractActorInfo } = await import("./timeline-store.js");
const actorInfo = await extractActorInfo(actor, { documentLoader });
if (actorInfo.photo) {
quoteData.author.photo = actorInfo.photo;
}
}
} catch {
// Actor fetch failed — keep existing author data
}
}
const quote = {
url: quoteData.url || quoteData.uid,
uid: quoteData.uid,
author: quoteData.author,
content: quoteData.content,
published: quoteData.published,
name: quoteData.name,
photo: quoteData.photo?.slice(0, 1) || [],
};
// Strip the "RE: <link>" paragraph from the parent post's content
// Mastodon adds this as: <p>RE: <a href="QUOTE_URL">...</a></p>
const update = { $set: { quote } };
const parentItem = await collections.ap_timeline.findOne({ uid });
if (parentItem?.content?.html) {
const cleaned = stripQuoteReferenceHtml(parentItem.content.html, quoteUrl);
if (cleaned !== parentItem.content.html) {
update.$set["content.html"] = cleaned;
}
}
await collections.ap_timeline.updateOne({ uid }, update);
} catch (error) {
console.error(`[og-unfurl] Failed to fetch quote for ${uid}: ${error.message}`);
}
}
/**
* Strip the "RE: <link>" paragraph that Mastodon adds for quoted posts.
* Removes <p> elements containing "RE:" followed by a link to the quote URL.
* @param {string} html - Content HTML
* @param {string} quoteUrl - URL of the quoted post
* @returns {string} Cleaned HTML
*/
export function stripQuoteReferenceHtml(html, quoteUrl) {
if (!html || !quoteUrl) return html;
// Match <p> containing "RE:" and a link whose href contains the quote domain+path
// Mastodon uses both /users/X/statuses/Y and /@X/Y URL formats
try {
const quoteUrlObj = new URL(quoteUrl);
const quoteDomain = quoteUrlObj.hostname;
// Escape special regex chars in domain
const domainEscaped = quoteDomain.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
// Match <p>RE: <a href="...DOMAIN...">...</a></p> (with optional whitespace)
const re = new RegExp(
`<p>\\s*RE:\\s*<a\\s[^>]*href="[^"]*${domainEscaped}[^"]*"[^>]*>.*?</a>\\s*</p>`,
"i",
);
return html.replace(re, "").trim();
} catch {
return html;
}
}