mirror of
https://github.com/svemagie/indiekit-endpoint-activitypub.git
synced 2026-04-02 15:44:58 +02:00
- FEP-8fcf: add syncCollection to Undo(Announce) sendActivity - FEP-fe34: centralized lookupWithSecurity() helper with crossOrigin: "ignore" on all 23 lookupObject call sites - Custom emoji: replaceCustomEmoji() renders :shortcode: as inline <img> in content and actor display names - Manual follow approval: profile toggle, ap_pending_follows collection, approve/reject controllers with federation, pending tab on followers page, follow_request notification type - Coverage audit updated to v2.12.x (overall ~70% → ~82%) Confab-Link: http://localhost:8080/sessions/1f1e729b-0087-499e-a991-f36f46211fe4
340 lines
11 KiB
JavaScript
340 lines
11 KiB
JavaScript
/**
|
|
* OpenGraph metadata fetching with concurrency limiting
|
|
* @module og-unfurl
|
|
*/
|
|
|
|
import { unfurl } from "unfurl.js";
|
|
import { extractObjectData } from "./timeline-store.js";
|
|
import { lookupWithSecurity } from "./lookup-helpers.js";
|
|
|
|
const USER_AGENT =
|
|
"Mozilla/5.0 (compatible; Indiekit/1.0; +https://getindiekit.com)";
|
|
const TIMEOUT_MS = 10000; // 10 seconds per URL
|
|
const MAX_CONCURRENT = 3; // Lower than theme's 5 (inbox context)
|
|
const MAX_PREVIEWS = 3; // Max previews per post
|
|
|
|
// Concurrency limiter — prevents overwhelming outbound network
|
|
let activeRequests = 0;
|
|
const queue = [];
|
|
|
|
function runNext() {
|
|
if (queue.length === 0 || activeRequests >= MAX_CONCURRENT) return;
|
|
activeRequests++;
|
|
const { resolve: res, fn } = queue.shift();
|
|
fn()
|
|
.then(res)
|
|
.finally(() => {
|
|
activeRequests--;
|
|
runNext();
|
|
});
|
|
}
|
|
|
|
function throttled(fn) {
|
|
return new Promise((res) => {
|
|
queue.push({ resolve: res, fn });
|
|
runNext();
|
|
});
|
|
}
|
|
|
|
function extractDomain(url) {
|
|
try {
|
|
return new URL(url).hostname.replace(/^www\./, "");
|
|
} catch {
|
|
return url;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a URL points to a private/reserved IP or localhost (SSRF protection)
|
|
* @param {string} url - URL to check
|
|
* @returns {boolean} True if URL targets a private network
|
|
*/
|
|
function isPrivateUrl(url) {
|
|
try {
|
|
const urlObj = new URL(url);
|
|
const hostname = urlObj.hostname.toLowerCase();
|
|
|
|
// Block non-http(s) schemes
|
|
if (urlObj.protocol !== "http:" && urlObj.protocol !== "https:") {
|
|
return true;
|
|
}
|
|
|
|
// Block localhost variants
|
|
if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1" || hostname === "[::1]") {
|
|
return true;
|
|
}
|
|
|
|
// Block private IPv4 ranges
|
|
const ipv4Match = hostname.match(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/);
|
|
if (ipv4Match) {
|
|
const [, a, b] = ipv4Match.map(Number);
|
|
if (a === 10) return true; // 10.0.0.0/8
|
|
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
|
|
if (a === 192 && b === 168) return true; // 192.168.0.0/16
|
|
if (a === 169 && b === 254) return true; // 169.254.0.0/16 (link-local / cloud metadata)
|
|
if (a === 127) return true; // 127.0.0.0/8
|
|
if (a === 0) return true; // 0.0.0.0/8
|
|
}
|
|
|
|
// Block IPv6 private ranges (basic check)
|
|
if (hostname.startsWith("[fc") || hostname.startsWith("[fd") || hostname.startsWith("[fe80")) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
} catch {
|
|
return true; // Invalid URL, treat as private
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract links from HTML content
|
|
* @param {string} html - Sanitized HTML content
|
|
* @returns {Array<{url: string, classes: string}>} Links with their class attributes
|
|
*/
|
|
function extractLinks(html) {
|
|
if (!html) return [];
|
|
|
|
const links = [];
|
|
// Match complete <a> tags and extract href + class from anywhere in attributes
|
|
const anchorRegex = /<a\s([^>]+)>/gi;
|
|
|
|
let match;
|
|
while ((match = anchorRegex.exec(html)) !== null) {
|
|
const attrs = match[1];
|
|
const hrefMatch = attrs.match(/href="([^"]+)"/);
|
|
const classMatch = attrs.match(/class="([^"]+)"/);
|
|
if (hrefMatch) {
|
|
links.push({ url: hrefMatch[1], classes: classMatch ? classMatch[1] : "" });
|
|
}
|
|
}
|
|
|
|
return links;
|
|
}
|
|
|
|
/**
|
|
* Check if URL is likely an ActivityPub object or media file
|
|
* @param {string} url - URL to check
|
|
* @returns {boolean} True if URL should be skipped
|
|
*/
|
|
function shouldSkipUrl(url) {
|
|
try {
|
|
const urlObj = new URL(url);
|
|
|
|
// SSRF protection — skip private/internal URLs
|
|
if (isPrivateUrl(url)) {
|
|
return true;
|
|
}
|
|
|
|
// Skip media extensions
|
|
const mediaExtensions = /\.(jpg|jpeg|png|gif|webp|mp4|webm|mov|mp3|wav|ogg)$/i;
|
|
if (mediaExtensions.test(urlObj.pathname)) {
|
|
return true;
|
|
}
|
|
|
|
// Skip common AP object patterns (heuristic - not exhaustive)
|
|
const apPatterns = [
|
|
/\/@[\w.-]+\/\d+/, // Mastodon /@user/12345
|
|
/\/@[\w.-]+\/statuses\/[\w]+/, // GoToSocial /@user/statuses/id
|
|
/\/users\/[\w.-]+\/statuses\/\d+/, // Mastodon/Pleroma /users/user/statuses/12345
|
|
/\/objects\/[\w-]+/, // Pleroma/Akkoma /objects/uuid
|
|
/\/notice\/[\w]+/, // Pleroma /notice/id
|
|
/\/notes\/[\w]+/, // Misskey /notes/id
|
|
];
|
|
|
|
return apPatterns.some((pattern) => pattern.test(urlObj.pathname));
|
|
} catch {
|
|
return true; // Invalid URL, skip
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch OpenGraph metadata for external links in HTML content
|
|
* @param {string} html - Sanitized HTML content
|
|
* @returns {Promise<Array<{url: string, title: string, description: string, image: string, favicon: string, domain: string, fetchedAt: string}>>} Link preview objects
|
|
*/
|
|
export async function fetchLinkPreviews(html) {
|
|
if (!html) return [];
|
|
|
|
const links = extractLinks(html);
|
|
|
|
// Filter links
|
|
const urlsToFetch = links
|
|
.filter((link) => {
|
|
// Skip mention links (class="mention")
|
|
if (link.classes.includes("mention")) return false;
|
|
|
|
// Skip hashtag links (class="hashtag")
|
|
if (link.classes.includes("hashtag")) return false;
|
|
|
|
// Skip AP object URLs and media files
|
|
if (shouldSkipUrl(link.url)) return false;
|
|
|
|
return true;
|
|
})
|
|
.map((link) => link.url)
|
|
.filter((url, index, self) => self.indexOf(url) === index) // Dedupe
|
|
.slice(0, MAX_PREVIEWS); // Cap at max
|
|
|
|
if (urlsToFetch.length === 0) return [];
|
|
|
|
// Fetch metadata for each URL (throttled)
|
|
const previews = await Promise.all(
|
|
urlsToFetch.map(async (url) => {
|
|
const metadata = await throttled(async () => {
|
|
try {
|
|
return await unfurl(url, {
|
|
timeout: TIMEOUT_MS,
|
|
headers: { "User-Agent": USER_AGENT },
|
|
});
|
|
} catch (error) {
|
|
console.warn(`[og-unfurl] Failed to fetch ${url}: ${error.message}`);
|
|
return null;
|
|
}
|
|
});
|
|
|
|
if (!metadata) return null;
|
|
|
|
const og = metadata.open_graph || {};
|
|
const tc = metadata.twitter_card || {};
|
|
|
|
const title = og.title || tc.title || metadata.title || extractDomain(url);
|
|
const description = og.description || tc.description || metadata.description || "";
|
|
const image = og.images?.[0]?.url || tc.images?.[0]?.url || null;
|
|
const favicon = metadata.favicon || null;
|
|
const domain = extractDomain(url);
|
|
|
|
// Truncate description
|
|
const maxDesc = 160;
|
|
const desc =
|
|
description.length > maxDesc
|
|
? description.slice(0, maxDesc).trim() + "\u2026"
|
|
: description;
|
|
|
|
return {
|
|
url,
|
|
title,
|
|
description: desc,
|
|
image,
|
|
favicon,
|
|
domain,
|
|
fetchedAt: new Date().toISOString(),
|
|
};
|
|
}),
|
|
);
|
|
|
|
// Filter out failed fetches (null results)
|
|
return previews.filter((preview) => preview !== null);
|
|
}
|
|
|
|
/**
|
|
* Fetch link previews and store them on a timeline item
|
|
* Fire-and-forget — caller does NOT await. Errors are caught and logged.
|
|
* @param {object} collections - MongoDB collections
|
|
* @param {string} uid - Timeline item UID
|
|
* @param {string} html - Post content HTML
|
|
* @returns {Promise<void>}
|
|
*/
|
|
export async function fetchAndStorePreviews(collections, uid, html) {
|
|
try {
|
|
const linkPreviews = await fetchLinkPreviews(html);
|
|
|
|
await collections.ap_timeline.updateOne(
|
|
{ uid },
|
|
{ $set: { linkPreviews } },
|
|
);
|
|
} catch (error) {
|
|
// Fire-and-forget — log errors but don't throw
|
|
console.error(
|
|
`[og-unfurl] Failed to store previews for ${uid}: ${error.message}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch a quoted post's data and store it on the timeline item.
|
|
* Fire-and-forget — caller does NOT await. Errors are caught and logged.
|
|
* @param {object} collections - MongoDB collections
|
|
* @param {string} uid - Timeline item UID (the quoting post)
|
|
* @param {string} quoteUrl - URL of the quoted post
|
|
* @param {object} ctx - Fedify context (for lookupObject)
|
|
* @param {object} documentLoader - Authenticated DocumentLoader
|
|
* @returns {Promise<void>}
|
|
*/
|
|
export async function fetchAndStoreQuote(collections, uid, quoteUrl, ctx, documentLoader) {
|
|
try {
|
|
const object = await lookupWithSecurity(ctx,new URL(quoteUrl), { documentLoader });
|
|
if (!object) return;
|
|
|
|
const quoteData = await extractObjectData(object, { documentLoader });
|
|
|
|
// If author photo is empty, try fetching the actor directly
|
|
if (!quoteData.author.photo && quoteData.author.url) {
|
|
try {
|
|
const actor = await lookupWithSecurity(ctx,new URL(quoteData.author.url), { documentLoader });
|
|
if (actor) {
|
|
const { extractActorInfo } = await import("./timeline-store.js");
|
|
const actorInfo = await extractActorInfo(actor, { documentLoader });
|
|
if (actorInfo.photo) {
|
|
quoteData.author.photo = actorInfo.photo;
|
|
}
|
|
}
|
|
} catch {
|
|
// Actor fetch failed — keep existing author data
|
|
}
|
|
}
|
|
|
|
const quote = {
|
|
url: quoteData.url || quoteData.uid,
|
|
uid: quoteData.uid,
|
|
author: quoteData.author,
|
|
content: quoteData.content,
|
|
published: quoteData.published,
|
|
name: quoteData.name,
|
|
photo: quoteData.photo?.slice(0, 1) || [],
|
|
};
|
|
|
|
// Strip the "RE: <link>" paragraph from the parent post's content
|
|
// Mastodon adds this as: <p>RE: <a href="QUOTE_URL">...</a></p>
|
|
const update = { $set: { quote } };
|
|
const parentItem = await collections.ap_timeline.findOne({ uid });
|
|
if (parentItem?.content?.html) {
|
|
const cleaned = stripQuoteReferenceHtml(parentItem.content.html, quoteUrl);
|
|
if (cleaned !== parentItem.content.html) {
|
|
update.$set["content.html"] = cleaned;
|
|
}
|
|
}
|
|
|
|
await collections.ap_timeline.updateOne({ uid }, update);
|
|
} catch (error) {
|
|
console.error(`[og-unfurl] Failed to fetch quote for ${uid}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Strip the "RE: <link>" paragraph that Mastodon adds for quoted posts.
|
|
* Removes <p> elements containing "RE:" followed by a link to the quote URL.
|
|
* @param {string} html - Content HTML
|
|
* @param {string} quoteUrl - URL of the quoted post
|
|
* @returns {string} Cleaned HTML
|
|
*/
|
|
export function stripQuoteReferenceHtml(html, quoteUrl) {
|
|
if (!html || !quoteUrl) return html;
|
|
// Match <p> containing "RE:" and a link whose href contains the quote domain+path
|
|
// Mastodon uses both /users/X/statuses/Y and /@X/Y URL formats
|
|
try {
|
|
const quoteUrlObj = new URL(quoteUrl);
|
|
const quoteDomain = quoteUrlObj.hostname;
|
|
// Escape special regex chars in domain
|
|
const domainEscaped = quoteDomain.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
// Match <p>RE: <a href="...DOMAIN...">...</a></p> (with optional whitespace)
|
|
const re = new RegExp(
|
|
`<p>\\s*RE:\\s*<a\\s[^>]*href="[^"]*${domainEscaped}[^"]*"[^>]*>.*?</a>\\s*</p>`,
|
|
"i",
|
|
);
|
|
return html.replace(re, "").trim();
|
|
} catch {
|
|
return html;
|
|
}
|
|
}
|