mirror of
https://github.com/svemagie/indiekit-endpoint-activitypub.git
synced 2026-04-02 15:44:58 +02:00
27 issues fixed from multi-dimensional code review (4 Critical, 6 High, 11 Medium, 6 Low): Security (Critical): - Escape HTML in OAuth authorization page to prevent XSS (C1) - Add CSRF protection to OAuth authorize flow (C2) - Replace bypassable regex sanitizer with sanitize-html library (C3) - Enforce OAuth scopes on all Mastodon API routes (C4) Security (Medium/Low): - Fix SSRF via DNS resolution before private IP check (M1) - Add rate limiting to API, auth, and app registration endpoints (M2) - Validate redirect_uri on POST /oauth/authorize (M4) - Fix custom emoji URL injection with scheme validation + escaping (M5) - Remove data: scheme from allowed image sources (L6) - Add access token expiry (1hr) and refresh token rotation (90d) (M3) - Hash client secrets before storage (L3) Architecture: - Extract batch-broadcast.js — shared delivery logic (H1a) - Extract init-indexes.js — MongoDB index creation (H1b) - Extract syndicator.js — syndication logic (H1c) - Create federation-actions.js facade for controllers (M6) - index.js reduced from 1810 to ~1169 lines (35%) Performance: - Cache moderation data with 30s TTL + write invalidation (H6) - Increase inbox queue throughput to 10 items/sec (H5) - Make account enrichment non-blocking with fire-and-forget (H4) - Remove ephemeral getReplies/getLikes/getShares from ingest (M11) - Fix LRU caches to use true LRU eviction (L1) - Fix N+1 backfill queries with batch $in lookup (L2) UI/UX: - Split 3441-line reader.css into 15 feature-scoped files (H2) - Extract inline Alpine.js interaction component (H3) - Reduce sidebar navigation from 7 to 3 items (M7) - Add ARIA live regions for dynamic content updates (M8) - Extract shared CW/non-CW content partial (M9) - Document form handling pattern convention (M10) - Add accessible labels to functional emoji icons (L4) - Convert profile editor to Alpine.js (L5) Audit: documentation-central/audits/2026-03-24-activitypub-code-review.md Plan: documentation-central/plans/2026-03-24-activitypub-audit-fixes.md
359 lines
11 KiB
JavaScript
359 lines
11 KiB
JavaScript
/**
|
|
* OpenGraph metadata fetching with concurrency limiting
|
|
* @module og-unfurl
|
|
*/
|
|
|
|
import { lookup } from "node:dns/promises";
|
|
import { isIP } from "node:net";
|
|
import { unfurl } from "unfurl.js";
|
|
import { extractObjectData } from "./timeline-store.js";
|
|
import { lookupWithSecurity } from "./lookup-helpers.js";
|
|
|
|
const USER_AGENT =
|
|
"Mozilla/5.0 (compatible; Indiekit/1.0; +https://getindiekit.com)";
|
|
const TIMEOUT_MS = 10000; // 10 seconds per URL
|
|
const MAX_CONCURRENT = 3; // Lower than theme's 5 (inbox context)
|
|
const MAX_PREVIEWS = 3; // Max previews per post
|
|
|
|
// Concurrency limiter — prevents overwhelming outbound network
|
|
let activeRequests = 0;
|
|
const queue = [];
|
|
|
|
function runNext() {
|
|
if (queue.length === 0 || activeRequests >= MAX_CONCURRENT) return;
|
|
activeRequests++;
|
|
const { resolve: res, fn } = queue.shift();
|
|
fn()
|
|
.then(res)
|
|
.finally(() => {
|
|
activeRequests--;
|
|
runNext();
|
|
});
|
|
}
|
|
|
|
function throttled(fn) {
|
|
return new Promise((res) => {
|
|
queue.push({ resolve: res, fn });
|
|
runNext();
|
|
});
|
|
}
|
|
|
|
function extractDomain(url) {
|
|
try {
|
|
return new URL(url).hostname.replace(/^www\./, "");
|
|
} catch {
|
|
return url;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if an IP address is in a private/reserved range.
|
|
* @param {string} ip - IPv4 or IPv6 address
|
|
* @returns {boolean} True if private/reserved
|
|
*/
|
|
function isPrivateIP(ip) {
|
|
if (isIP(ip) === 4) {
|
|
const parts = ip.split(".").map(Number);
|
|
const [a, b] = parts;
|
|
if (a === 10) return true; // 10.0.0.0/8
|
|
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
|
|
if (a === 192 && b === 168) return true; // 192.168.0.0/16
|
|
if (a === 169 && b === 254) return true; // 169.254.0.0/16 (link-local)
|
|
if (a === 127) return true; // 127.0.0.0/8
|
|
if (a === 0) return true; // 0.0.0.0/8
|
|
}
|
|
if (isIP(ip) === 6) {
|
|
const lower = ip.toLowerCase();
|
|
if (lower.startsWith("fc") || lower.startsWith("fd")) return true; // ULA
|
|
if (lower.startsWith("fe80")) return true; // link-local
|
|
if (lower === "::1") return true; // loopback
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Check if a URL resolves to a private/reserved IP (SSRF protection).
|
|
* Performs DNS resolution to defeat DNS rebinding attacks.
|
|
* @param {string} url - URL to check
|
|
* @returns {Promise<boolean>} True if URL targets a private network
|
|
*/
|
|
async function isPrivateResolved(url) {
|
|
try {
|
|
const urlObj = new URL(url);
|
|
|
|
// Block non-http(s) schemes
|
|
if (urlObj.protocol !== "http:" && urlObj.protocol !== "https:") {
|
|
return true;
|
|
}
|
|
|
|
const hostname = urlObj.hostname.toLowerCase().replace(/^\[|\]$/g, "");
|
|
|
|
// Block obvious localhost variants
|
|
if (hostname === "localhost") return true;
|
|
|
|
// If hostname is already an IP, check directly (no DNS needed)
|
|
if (isIP(hostname)) return isPrivateIP(hostname);
|
|
|
|
// DNS resolution — check the resolved IP
|
|
const { address } = await lookup(hostname);
|
|
return isPrivateIP(address);
|
|
} catch {
|
|
return true; // DNS failure or invalid URL — treat as private
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract links from HTML content
|
|
* @param {string} html - Sanitized HTML content
|
|
* @returns {Array<{url: string, classes: string}>} Links with their class attributes
|
|
*/
|
|
function extractLinks(html) {
|
|
if (!html) return [];
|
|
|
|
const links = [];
|
|
// Match complete <a> tags and extract href + class from anywhere in attributes
|
|
const anchorRegex = /<a\s([^>]+)>/gi;
|
|
|
|
let match;
|
|
while ((match = anchorRegex.exec(html)) !== null) {
|
|
const attrs = match[1];
|
|
const hrefMatch = attrs.match(/href="([^"]+)"/);
|
|
const classMatch = attrs.match(/class="([^"]+)"/);
|
|
if (hrefMatch) {
|
|
links.push({ url: hrefMatch[1], classes: classMatch ? classMatch[1] : "" });
|
|
}
|
|
}
|
|
|
|
return links;
|
|
}
|
|
|
|
/**
|
|
* Check if URL is likely an ActivityPub object or media file
|
|
* @param {string} url - URL to check
|
|
* @returns {Promise<boolean>} True if URL should be skipped
|
|
*/
|
|
async function shouldSkipUrl(url) {
|
|
try {
|
|
const urlObj = new URL(url);
|
|
|
|
// SSRF protection — skip private/internal URLs
|
|
if (await isPrivateResolved(url)) {
|
|
return true;
|
|
}
|
|
|
|
// Skip media extensions
|
|
const mediaExtensions = /\.(jpg|jpeg|png|gif|webp|mp4|webm|mov|mp3|wav|ogg)$/i;
|
|
if (mediaExtensions.test(urlObj.pathname)) {
|
|
return true;
|
|
}
|
|
|
|
// Skip common AP object patterns (heuristic - not exhaustive)
|
|
const apPatterns = [
|
|
/\/@[\w.-]+\/\d+/, // Mastodon /@user/12345
|
|
/\/@[\w.-]+\/statuses\/[\w]+/, // GoToSocial /@user/statuses/id
|
|
/\/users\/[\w.-]+\/statuses\/\d+/, // Mastodon/Pleroma /users/user/statuses/12345
|
|
/\/objects\/[\w-]+/, // Pleroma/Akkoma /objects/uuid
|
|
/\/notice\/[\w]+/, // Pleroma /notice/id
|
|
/\/notes\/[\w]+/, // Misskey /notes/id
|
|
];
|
|
|
|
return apPatterns.some((pattern) => pattern.test(urlObj.pathname));
|
|
} catch {
|
|
return true; // Invalid URL, skip
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch OpenGraph metadata for external links in HTML content
|
|
* @param {string} html - Sanitized HTML content
|
|
* @returns {Promise<Array<{url: string, title: string, description: string, image: string, favicon: string, domain: string, fetchedAt: string}>>} Link preview objects
|
|
*/
|
|
export async function fetchLinkPreviews(html) {
|
|
if (!html) return [];
|
|
|
|
const links = extractLinks(html);
|
|
|
|
// Filter links — async because shouldSkipUrl performs DNS resolution
|
|
const filterResults = await Promise.all(
|
|
links.map(async (link) => {
|
|
// Skip mention links (class="mention")
|
|
if (link.classes.includes("mention")) return false;
|
|
|
|
// Skip hashtag links (class="hashtag")
|
|
if (link.classes.includes("hashtag")) return false;
|
|
|
|
// Skip AP object URLs and media files
|
|
if (await shouldSkipUrl(link.url)) return false;
|
|
|
|
return true;
|
|
}),
|
|
);
|
|
|
|
const urlsToFetch = links
|
|
.filter((_, index) => filterResults[index])
|
|
.map((link) => link.url)
|
|
.filter((url, index, self) => self.indexOf(url) === index) // Dedupe
|
|
.slice(0, MAX_PREVIEWS); // Cap at max
|
|
|
|
if (urlsToFetch.length === 0) return [];
|
|
|
|
// Fetch metadata for each URL (throttled)
|
|
const previews = await Promise.all(
|
|
urlsToFetch.map(async (url) => {
|
|
const metadata = await throttled(async () => {
|
|
try {
|
|
return await unfurl(url, {
|
|
timeout: TIMEOUT_MS,
|
|
headers: { "User-Agent": USER_AGENT },
|
|
});
|
|
} catch (error) {
|
|
console.warn(`[og-unfurl] Failed to fetch ${url}: ${error.message}`);
|
|
return null;
|
|
}
|
|
});
|
|
|
|
if (!metadata) return null;
|
|
|
|
const og = metadata.open_graph || {};
|
|
const tc = metadata.twitter_card || {};
|
|
|
|
const title = og.title || tc.title || metadata.title || extractDomain(url);
|
|
const description = og.description || tc.description || metadata.description || "";
|
|
const image = og.images?.[0]?.url || tc.images?.[0]?.url || null;
|
|
const favicon = metadata.favicon || null;
|
|
const domain = extractDomain(url);
|
|
|
|
// Truncate description
|
|
const maxDesc = 160;
|
|
const desc =
|
|
description.length > maxDesc
|
|
? description.slice(0, maxDesc).trim() + "\u2026"
|
|
: description;
|
|
|
|
return {
|
|
url,
|
|
title,
|
|
description: desc,
|
|
image,
|
|
favicon,
|
|
domain,
|
|
fetchedAt: new Date().toISOString(),
|
|
};
|
|
}),
|
|
);
|
|
|
|
// Filter out failed fetches (null results)
|
|
return previews.filter((preview) => preview !== null);
|
|
}
|
|
|
|
/**
|
|
* Fetch link previews and store them on a timeline item
|
|
* Fire-and-forget — caller does NOT await. Errors are caught and logged.
|
|
* @param {object} collections - MongoDB collections
|
|
* @param {string} uid - Timeline item UID
|
|
* @param {string} html - Post content HTML
|
|
* @returns {Promise<void>}
|
|
*/
|
|
export async function fetchAndStorePreviews(collections, uid, html) {
|
|
try {
|
|
const linkPreviews = await fetchLinkPreviews(html);
|
|
|
|
await collections.ap_timeline.updateOne(
|
|
{ uid },
|
|
{ $set: { linkPreviews } },
|
|
);
|
|
} catch (error) {
|
|
// Fire-and-forget — log errors but don't throw
|
|
console.error(
|
|
`[og-unfurl] Failed to store previews for ${uid}: ${error.message}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch a quoted post's data and store it on the timeline item.
|
|
* Fire-and-forget — caller does NOT await. Errors are caught and logged.
|
|
* @param {object} collections - MongoDB collections
|
|
* @param {string} uid - Timeline item UID (the quoting post)
|
|
* @param {string} quoteUrl - URL of the quoted post
|
|
* @param {object} ctx - Fedify context (for lookupObject)
|
|
* @param {object} documentLoader - Authenticated DocumentLoader
|
|
* @returns {Promise<void>}
|
|
*/
|
|
export async function fetchAndStoreQuote(collections, uid, quoteUrl, ctx, documentLoader) {
|
|
try {
|
|
const object = await lookupWithSecurity(ctx,new URL(quoteUrl), { documentLoader });
|
|
if (!object) return;
|
|
|
|
const quoteData = await extractObjectData(object, { documentLoader });
|
|
|
|
// If author photo is empty, try fetching the actor directly
|
|
if (!quoteData.author.photo && quoteData.author.url) {
|
|
try {
|
|
const actor = await lookupWithSecurity(ctx,new URL(quoteData.author.url), { documentLoader });
|
|
if (actor) {
|
|
const { extractActorInfo } = await import("./timeline-store.js");
|
|
const actorInfo = await extractActorInfo(actor, { documentLoader });
|
|
if (actorInfo.photo) {
|
|
quoteData.author.photo = actorInfo.photo;
|
|
}
|
|
}
|
|
} catch {
|
|
// Actor fetch failed — keep existing author data
|
|
}
|
|
}
|
|
|
|
const quote = {
|
|
url: quoteData.url || quoteData.uid,
|
|
uid: quoteData.uid,
|
|
author: quoteData.author,
|
|
content: quoteData.content,
|
|
published: quoteData.published,
|
|
name: quoteData.name,
|
|
photo: quoteData.photo?.slice(0, 1) || [],
|
|
};
|
|
|
|
// Strip the "RE: <link>" paragraph from the parent post's content
|
|
// Mastodon adds this as: <p>RE: <a href="QUOTE_URL">...</a></p>
|
|
const update = { $set: { quote } };
|
|
const parentItem = await collections.ap_timeline.findOne({ uid });
|
|
if (parentItem?.content?.html) {
|
|
const cleaned = stripQuoteReferenceHtml(parentItem.content.html, quoteUrl);
|
|
if (cleaned !== parentItem.content.html) {
|
|
update.$set["content.html"] = cleaned;
|
|
}
|
|
}
|
|
|
|
await collections.ap_timeline.updateOne({ uid }, update);
|
|
} catch (error) {
|
|
console.error(`[og-unfurl] Failed to fetch quote for ${uid}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Strip the "RE: <link>" paragraph that Mastodon adds for quoted posts.
|
|
* Removes <p> elements containing "RE:" followed by a link to the quote URL.
|
|
* @param {string} html - Content HTML
|
|
* @param {string} quoteUrl - URL of the quoted post
|
|
* @returns {string} Cleaned HTML
|
|
*/
|
|
export function stripQuoteReferenceHtml(html, quoteUrl) {
|
|
if (!html || !quoteUrl) return html;
|
|
// Match <p> containing "RE:" and a link whose href contains the quote domain+path
|
|
// Mastodon uses both /users/X/statuses/Y and /@X/Y URL formats
|
|
try {
|
|
const quoteUrlObj = new URL(quoteUrl);
|
|
const quoteDomain = quoteUrlObj.hostname;
|
|
// Escape special regex chars in domain
|
|
const domainEscaped = quoteDomain.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
// Match <p>RE: <a href="...DOMAIN...">...</a></p> (with optional whitespace)
|
|
const re = new RegExp(
|
|
`<p>\\s*RE:\\s*<a\\s[^>]*href="[^"]*${domainEscaped}[^"]*"[^>]*>.*?</a>\\s*</p>`,
|
|
"i",
|
|
);
|
|
return html.replace(re, "").trim();
|
|
} catch {
|
|
return html;
|
|
}
|
|
}
|