Files
indiekit-endpoint-activitypub/lib/mastodon/backfill-timeline.js
Ricardo 12454749ad fix: comprehensive security, performance, and architecture audit fixes
27 issues fixed from multi-dimensional code review (4 Critical, 6 High, 11 Medium, 6 Low):

Security (Critical):
- Escape HTML in OAuth authorization page to prevent XSS (C1)
- Add CSRF protection to OAuth authorize flow (C2)
- Replace bypassable regex sanitizer with sanitize-html library (C3)
- Enforce OAuth scopes on all Mastodon API routes (C4)

Security (Medium/Low):
- Fix SSRF via DNS resolution before private IP check (M1)
- Add rate limiting to API, auth, and app registration endpoints (M2)
- Validate redirect_uri on POST /oauth/authorize (M4)
- Fix custom emoji URL injection with scheme validation + escaping (M5)
- Remove data: scheme from allowed image sources (L6)
- Add access token expiry (1hr) and refresh token rotation (90d) (M3)
- Hash client secrets before storage (L3)

Architecture:
- Extract batch-broadcast.js — shared delivery logic (H1a)
- Extract init-indexes.js — MongoDB index creation (H1b)
- Extract syndicator.js — syndication logic (H1c)
- Create federation-actions.js facade for controllers (M6)
- index.js reduced from 1810 to ~1169 lines (35%)

Performance:
- Cache moderation data with 30s TTL + write invalidation (H6)
- Increase inbox queue throughput to 10 items/sec (H5)
- Make account enrichment non-blocking with fire-and-forget (H4)
- Remove ephemeral getReplies/getLikes/getShares from ingest (M11)
- Fix LRU caches to use true LRU eviction (L1)
- Fix N+1 backfill queries with batch $in lookup (L2)

UI/UX:
- Split 3441-line reader.css into 15 feature-scoped files (H2)
- Extract inline Alpine.js interaction component (H3)
- Reduce sidebar navigation from 7 to 3 items (M7)
- Add ARIA live regions for dynamic content updates (M8)
- Extract shared CW/non-CW content partial (M9)
- Document form handling pattern convention (M10)
- Add accessible labels to functional emoji icons (L4)
- Convert profile editor to Alpine.js (L5)

Audit: documentation-central/audits/2026-03-24-activitypub-code-review.md
Plan: documentation-central/plans/2026-03-24-activitypub-audit-fixes.md
2026-03-25 07:41:20 +01:00

321 lines
9.3 KiB
JavaScript

/**
* Backfill ap_timeline from the posts collection.
*
* Runs on startup (idempotent — uses upsert by uid).
* Converts Micropub JF2 posts into ap_timeline format so they
* appear in Mastodon Client API timelines and profile views.
*/
/**
* Backfill ap_timeline with published posts from the posts collection.
*
* @param {object} collections - MongoDB collections (must include posts, ap_timeline, ap_profile)
* @returns {Promise<{ total: number, inserted: number, skipped: number }>}
*/
export async function backfillTimeline(collections) {
const { posts, ap_timeline, ap_profile } = collections;
if (!posts || !ap_timeline) {
return { total: 0, inserted: 0, skipped: 0 };
}
// Get local profile for author info
const profile = await ap_profile?.findOne({});
const siteUrl = profile?.url?.replace(/\/$/, "") || "";
const author = profile
? {
name: profile.name || "",
url: profile.url || "",
photo: profile.icon || "",
handle: "",
}
: { name: "", url: "", photo: "", handle: "" };
// Fetch all published posts
const allPosts = await posts
.find({
"properties.post-status": { $ne: "draft" },
"properties.deleted": { $exists: false },
"properties.url": { $exists: true },
})
.toArray();
let inserted = 0;
let skipped = 0;
// Batch-fetch existing UIDs to avoid N+1 per-post queries
const allUids = allPosts
.map((p) => p.properties?.url)
.filter(Boolean);
const existingDocs = await ap_timeline
.find({ uid: { $in: allUids } })
.project({ uid: 1 })
.toArray();
const existingUids = new Set(existingDocs.map((d) => d.uid));
for (const post of allPosts) {
const props = post.properties;
if (!props?.url) {
skipped++;
continue;
}
const uid = props.url;
// Check if already in timeline (fast path to avoid unnecessary upserts)
if (existingUids.has(uid)) {
skipped++;
continue;
}
// Build content — interaction types (bookmark, like, repost) may not have
// body content, so synthesize it from the interaction target URL
const content = buildContent(props);
const type = mapPostType(props["post-type"]);
// Extract categories + inline hashtags from content
const categories = normalizeArray(props.category);
const inlineHashtags = extractHashtags(content.text + " " + (content.html || ""));
const mergedCategories = mergeCategories(categories, inlineHashtags);
const timelineItem = {
uid,
url: uid,
type,
content: rewriteHashtagLinks(content, siteUrl),
author,
published: props.published || props.date || new Date().toISOString(),
createdAt: props.published || props.date || new Date().toISOString(),
visibility: "public",
sensitive: false,
category: mergedCategories,
photo: normalizeMediaArray(props.photo, siteUrl),
video: normalizeMediaArray(props.video, siteUrl),
audio: normalizeMediaArray(props.audio, siteUrl),
readBy: [],
};
// Optional fields
if (props.name) timelineItem.name = props.name;
if (props.summary) timelineItem.summary = props.summary;
if (props["in-reply-to"]) {
timelineItem.inReplyTo = Array.isArray(props["in-reply-to"])
? props["in-reply-to"][0]
: props["in-reply-to"];
}
try {
const result = await ap_timeline.updateOne(
{ uid },
{ $setOnInsert: timelineItem },
{ upsert: true },
);
if (result.upsertedCount > 0) {
inserted++;
} else {
skipped++;
}
} catch {
skipped++;
}
}
return { total: allPosts.length, inserted, skipped };
}
// ─── Content Building ─────────────────────────────────────────────────────────
/**
* Build content from JF2 properties, synthesizing content for interaction types.
* Bookmarks, likes, and reposts often have no body text — show the target URL.
*/
function buildContent(props) {
const raw = normalizeContent(props.content);
// If there's already content, use it
if (raw.text || raw.html) return raw;
// Synthesize content for interaction types
const bookmarkOf = props["bookmark-of"];
const likeOf = props["like-of"];
const repostOf = props["repost-of"];
const name = props.name;
if (bookmarkOf) {
const label = name || bookmarkOf;
return {
text: `Bookmarked: ${label}`,
html: `<p>Bookmarked: <a href="${escapeHtml(bookmarkOf)}">${escapeHtml(label)}</a></p>`,
};
}
if (likeOf) {
return {
text: `Liked: ${likeOf}`,
html: `<p>Liked: <a href="${escapeHtml(likeOf)}">${escapeHtml(likeOf)}</a></p>`,
};
}
if (repostOf) {
const label = name || repostOf;
return {
text: `Reposted: ${label}`,
html: `<p>Reposted: <a href="${escapeHtml(repostOf)}">${escapeHtml(label)}</a></p>`,
};
}
// Article with title but no body
if (name) {
return { text: name, html: `<p>${escapeHtml(name)}</p>` };
}
return raw;
}
/**
* Normalize content from JF2 properties to { text, html } format.
*/
function normalizeContent(content) {
if (!content) return { text: "", html: "" };
if (typeof content === "string") return { text: content, html: `<p>${content}</p>` };
if (typeof content === "object") {
return {
text: content.text || content.value || "",
html: content.html || content.text || content.value || "",
};
}
return { text: "", html: "" };
}
// ─── Hashtag Handling ─────────────────────────────────────────────────────────
/**
* Extract hashtags from text content.
* Matches #word patterns, returns lowercase tag names without the # prefix.
*/
function extractHashtags(text) {
if (!text) return [];
const matches = text.match(/(?:^|\s)#([a-zA-Z_]\w*)/g);
if (!matches) return [];
return [...new Set(matches.map((m) => m.trim().slice(1).toLowerCase()))];
}
/**
* Merge explicit categories with inline hashtags (deduplicated, case-insensitive).
*/
function mergeCategories(categories, hashtags) {
const seen = new Set(categories.map((c) => c.toLowerCase()));
const result = [...categories];
for (const tag of hashtags) {
if (!seen.has(tag)) {
seen.add(tag);
result.push(tag);
}
}
return result;
}
/**
* Rewrite hashtag links in HTML from site-internal (/categories/tag/) to
* Mastodon-compatible format. Mastodon clients use the tag objects, not
* inline links, but having correct href helps with link following.
*/
function rewriteHashtagLinks(content, siteUrl) {
if (!content.html) return content;
// Rewrite /categories/tag/ links to /tags/tag (Mastodon convention)
let html = content.html.replace(
/href="\/categories\/([^/"]+)\/?"/g,
(_, tag) => `href="${siteUrl}/tags/${tag}" class="hashtag" rel="tag"`,
);
// Also rewrite absolute site category links
if (siteUrl) {
html = html.replace(
new RegExp(`href="${escapeRegex(siteUrl)}/categories/([^/"]+)/?"`, "g"),
(_, tag) => `href="${siteUrl}/tags/${tag}" class="hashtag" rel="tag"`,
);
}
return { ...content, html };
}
// ─── Post Type Mapping ────────────────────────────────────────────────────────
/**
* Map Micropub post-type to timeline type.
*/
function mapPostType(postType) {
switch (postType) {
case "article":
return "article";
case "photo":
case "video":
case "audio":
return "note";
case "reply":
return "note";
case "repost":
return "boost";
case "like":
case "bookmark":
return "note";
default:
return "note";
}
}
// ─── Normalization Helpers ────────────────────────────────────────────────────
/**
* Normalize a value to an array of strings.
*/
function normalizeArray(value) {
if (!value) return [];
if (Array.isArray(value)) return value.map(String);
return [String(value)];
}
/**
* Normalize media values — resolves relative URLs to absolute.
*
* @param {*} value - String, object with url, or array thereof
* @param {string} siteUrl - Base site URL for resolving relative paths
*/
function normalizeMediaArray(value, siteUrl) {
if (!value) return [];
const arr = Array.isArray(value) ? value : [value];
return arr.map((item) => {
if (typeof item === "string") return resolveUrl(item, siteUrl);
if (typeof item === "object" && item.url) {
return { ...item, url: resolveUrl(item.url, siteUrl) };
}
return null;
}).filter(Boolean);
}
/**
* Resolve a URL — if relative, prepend the site URL.
*/
function resolveUrl(url, siteUrl) {
if (!url) return url;
if (url.startsWith("http://") || url.startsWith("https://")) return url;
if (url.startsWith("/")) return `${siteUrl}${url}`;
return `${siteUrl}/${url}`;
}
/**
* Escape HTML entities.
*/
function escapeHtml(str) {
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
}
/**
* Escape regex special characters.
*/
function escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}