Files
indiekit-endpoint-activitypub/lib/timeline-store.js
svemagie a259c79a31 fix(mastodon-api): favourite/like 404 for items with BSON Date or timezone-offset published
Three-layer fix for findTimelineItemById cursor mismatches:

1. encodeCursor returns "" (falsy) for invalid dates — serializeStatus
   now correctly falls back to item._id.toString() instead of using "0"
   as an opaque ID that can never be looked up.

2. findTimelineItemById adds two new fallbacks after the existing string
   lookups fail:
   - BSON Date lookup: Micropub pipeline (postData.create) may store
     published as a JavaScript Date → MongoDB BSON Date; string
     comparison never matches.
   - ±999 ms ISO range query: some AP servers send published with a
     timezone offset (e.g. +01:00). String(Temporal.Instant) preserves
     the original offset; decodeCursor normalizes to UTC, so the stored
     string and the lookup string differ.

3. timeline-store.js extractObjectData now normalizes published via
   new Date(String(...)).toISOString() before storing, ensuring all
   future items are stored in UTC ISO format and the exact-match lookup
   succeeds without needing the range fallback.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 08:30:14 +01:00

408 lines
13 KiB
JavaScript

/**
* Timeline item extraction helpers
* @module timeline-store
*/
import { Article, Application, Emoji, Hashtag, Mention, Question, Service } from "@fedify/fedify/vocab";
import sanitizeHtml from "sanitize-html";
/**
* Sanitize HTML content for safe display
* @param {string} html - Raw HTML content
* @returns {string} Sanitized HTML
*/
export function sanitizeContent(html) {
if (!html) return "";
return sanitizeHtml(html, {
allowedTags: [
"p", "br", "a", "strong", "em", "ul", "ol", "li",
"blockquote", "code", "pre", "h1", "h2", "h3", "h4", "h5", "h6",
"span", "div", "img"
],
allowedAttributes: {
a: ["href", "rel", "class"],
img: ["src", "alt", "class"],
span: ["class"],
div: ["class"]
},
allowedSchemes: ["http", "https", "mailto"],
allowedSchemesByTag: {
img: ["http", "https", "data"]
}
});
}
/**
* Replace custom emoji :shortcode: placeholders with inline <img> tags.
* Applied AFTER sanitization — the <img> tags are controlled output from
* trusted emoji data, not user-supplied HTML.
*
* @param {string} html - Content HTML (already sanitized)
* @param {Array<{shortcode: string, url: string}>} emojis - Custom emoji data
* @returns {string} HTML with shortcodes replaced by <img> tags
*/
export function replaceCustomEmoji(html, emojis) {
if (!emojis?.length || !html) return html;
let result = html;
for (const { shortcode, url } of emojis) {
const escaped = shortcode.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const pattern = new RegExp(`:${escaped}:`, "g");
result = result.replace(
pattern,
`<img class="ap-custom-emoji" src="${url}" alt=":${shortcode}:" title=":${shortcode}:" draggable="false">`,
);
}
return result;
}
/**
* Extract actor information from Fedify Person/Application/Service object
* @param {object} actor - Fedify actor object
* @param {object} [options] - Options
* @param {object} [options.documentLoader] - Authenticated DocumentLoader for Secure Mode servers
* @returns {object} { name, url, photo, handle }
*/
export async function extractActorInfo(actor, options = {}) {
if (!actor) {
return {
name: "Unknown",
url: "",
photo: "",
handle: ""
};
}
const rawName = actor.name?.toString() || actor.preferredUsername?.toString() || "Unknown";
// Strip all HTML from actor names to prevent stored XSS
const name = sanitizeHtml(rawName, { allowedTags: [], allowedAttributes: {} });
const url = actor.id?.href || "";
// Extract photo URL from icon (Fedify uses async getters)
const loaderOpts = options.documentLoader ? { documentLoader: options.documentLoader } : {};
let photo = "";
try {
if (typeof actor.getIcon === "function") {
const iconObj = await actor.getIcon(loaderOpts);
photo = iconObj?.url?.href || "";
} else {
const iconObj = await actor.icon;
photo = iconObj?.url?.href || "";
}
} catch {
// No icon available
}
// Extract handle from actor URL
let handle = "";
try {
const actorUrl = new URL(url);
const username = actor.preferredUsername?.toString() || "";
if (username) {
handle = `@${username}@${actorUrl.hostname}`;
}
} catch {
// Invalid URL, keep handle empty
}
// Extract custom emoji from actor tags
const emojis = [];
try {
if (typeof actor.getTags === "function") {
const tags = await actor.getTags(loaderOpts);
for await (const tag of tags) {
if (tag instanceof Emoji) {
const shortcode = (tag.name?.toString() || "").replace(/^:|:$/g, "");
const iconUrl = tag.iconId?.href || "";
if (shortcode && iconUrl) {
emojis.push({ shortcode, url: iconUrl });
}
}
}
}
} catch {
// Emoji extraction failed — non-critical
}
// Bot detection — Service and Application actors are automated accounts
const bot = actor instanceof Service || actor instanceof Application;
// Replace custom emoji shortcodes in display name with <img> tags
const nameHtml = replaceCustomEmoji(name, emojis);
return { name, nameHtml, url, photo, handle, emojis, bot };
}
/**
* Extract timeline item data from Fedify Note/Article object
* @param {object} object - Fedify Note or Article object
* @param {object} options - Extraction options
* @param {object} [options.boostedBy] - Actor info for boosts
* @param {Date} [options.boostedAt] - Boost timestamp
* @param {object} [options.actorFallback] - Fedify actor to use when object.getAttributedTo() fails
* @param {object} [options.documentLoader] - Authenticated DocumentLoader for Secure Mode servers
* @returns {Promise<object>} Timeline item data with:
* - category: string[] — hashtag names (stripped of # prefix)
* - mentions: Array<{name: string, url: string}> — @mention entries with actor URLs
*/
export async function extractObjectData(object, options = {}) {
if (!object) {
throw new Error("Object is required");
}
const uid = object.id?.href || "";
const url = object.url?.href || uid;
// Determine type — use instanceof for Fedify vocab objects
let type = "note";
if (object instanceof Article) {
type = "article";
}
if (object instanceof Question) {
type = "question";
}
if (options.boostedBy) {
type = "boost";
}
// Extract content
const contentHtml = object.content?.toString() || "";
const contentText = object.source?.content?.toString() || contentHtml.replace(/<[^>]*>/g, "");
const content = {
text: contentText,
html: sanitizeContent(contentHtml)
};
// Extract name (articles only)
const name = type === "article" ? (object.name?.toString() || "") : "";
// Content warning / summary
const summary = object.summary?.toString() || "";
const sensitive = object.sensitive || false;
// Poll options (Question type)
let pollOptions = [];
let votersCount = 0;
let pollClosed = false;
let pollEndTime = "";
if (object instanceof Question) {
try {
const exclusive = [];
for await (const opt of object.getExclusiveOptions?.() || []) {
exclusive.push({
name: opt.name?.toString() || "",
votes: typeof opt.replies?.totalItems === "number" ? opt.replies.totalItems : 0,
});
}
const inclusive = [];
for await (const opt of object.getInclusiveOptions?.() || []) {
inclusive.push({
name: opt.name?.toString() || "",
votes: typeof opt.replies?.totalItems === "number" ? opt.replies.totalItems : 0,
});
}
pollOptions = exclusive.length > 0 ? exclusive : inclusive;
} catch {
// Poll options couldn't be extracted — show as regular post
}
votersCount = typeof object.voters === "number" ? object.voters : 0;
pollEndTime = object.endTime ? String(object.endTime) : "";
const closedValue = object.closed;
pollClosed = closedValue === true || (closedValue != null && closedValue !== false);
}
// Published date — store as UTC ISO string so cursor-based lookups always match.
// String(Temporal.Instant) preserves the original timezone offset (e.g. +01:00);
// normalizing via new Date() converts to Z-suffix UTC, matching decodeCursor output.
const published = object.published
? new Date(String(object.published)).toISOString()
: new Date().toISOString();
// Edited date — non-null when the post has been updated after publishing
const updated = object.updated ? String(object.updated) : "";
// Extract author — try multiple strategies in order of reliability
const loaderOpts = options.documentLoader ? { documentLoader: options.documentLoader } : {};
let authorObj = null;
try {
if (typeof object.getAttributedTo === "function") {
const attr = await object.getAttributedTo(loaderOpts);
authorObj = Array.isArray(attr) ? attr[0] : attr;
}
} catch {
// getAttributedTo() failed (unreachable, deleted, etc.)
}
// If getAttributedTo() returned nothing, use the actor from the wrapping activity
if (!authorObj && options.actorFallback) {
authorObj = options.actorFallback;
}
// Try direct property access for plain objects
if (!authorObj) {
authorObj = object.attribution || object.attributedTo || null;
}
let author;
if (authorObj) {
author = await extractActorInfo(authorObj, loaderOpts);
} else {
// Last resort: use attributionIds (non-fetching) to get at least a URL
const attrIds = object.attributionIds;
if (attrIds && attrIds.length > 0) {
const authorUrl = attrIds[0].href;
const parsedUrl = new URL(authorUrl);
const authorHostname = parsedUrl.hostname;
// Extract username from common URL patterns:
// /@username, /users/username, /ap/users/12345/
const pathname = parsedUrl.pathname;
let username = "";
const atPattern = pathname.match(/\/@([^/]+)/);
const usersPattern = pathname.match(/\/users\/([^/]+)/);
if (atPattern) {
username = atPattern[1];
} else if (usersPattern) {
username = usersPattern[1];
}
author = {
name: username || authorHostname,
url: authorUrl,
photo: "",
handle: username ? `@${username}@${authorHostname}` : "",
};
} else {
author = { name: "Unknown", url: "", photo: "", handle: "" };
}
}
// Extract tags — Fedify uses async getTags() which returns typed vocab objects.
// Hashtag → category[] (plain strings, # prefix stripped)
// Mention → mentions[] ({ name, url } objects for profile linking)
// Emoji → emojis[] ({ shortcode, url } for custom emoji rendering)
const category = [];
const mentions = [];
const emojis = [];
try {
if (typeof object.getTags === "function") {
const tags = await object.getTags(loaderOpts);
for await (const tag of tags) {
if (tag instanceof Hashtag) {
const tagName = tag.name?.toString().replace(/^#/, "") || "";
if (tagName) category.push(tagName);
} else if (tag instanceof Mention) {
// Strip leading @ from name (Fedify Mention names start with @)
const rawName = tag.name?.toString() || "";
const mentionName = rawName.startsWith("@") ? rawName.slice(1) : rawName;
// tag.href is a URL object — use .href to get the string
const mentionUrl = tag.href?.href || "";
if (mentionName) mentions.push({ name: mentionName, url: mentionUrl });
} else if (tag instanceof Emoji) {
// Custom emoji: name is ":shortcode:", icon is an Image with url
const shortcode = (tag.name?.toString() || "").replace(/^:|:$/g, "");
const iconUrl = tag.iconId?.href || "";
if (shortcode && iconUrl) {
emojis.push({ shortcode, url: iconUrl });
}
}
}
}
} catch {
// Tags extraction failed — non-critical
}
// Extract media attachments — Fedify uses async getAttachments()
const photo = [];
const video = [];
const audio = [];
try {
if (typeof object.getAttachments === "function") {
const attachments = await object.getAttachments(loaderOpts);
for await (const att of attachments) {
const mediaUrl = att.url?.href || "";
if (!mediaUrl) continue;
const mediaType = att.mediaType?.toLowerCase() || "";
if (mediaType.startsWith("image/")) {
photo.push({
url: mediaUrl,
alt: att.name?.toString() || "",
width: att.width || null,
height: att.height || null,
});
} else if (mediaType.startsWith("video/")) {
video.push(mediaUrl);
} else if (mediaType.startsWith("audio/")) {
audio.push(mediaUrl);
}
}
}
} catch {
// Attachment extraction failed — non-critical
}
// In-reply-to — Fedify uses replyTargetId (non-fetching)
const inReplyTo = object.replyTargetId?.href || "";
// Quote URL — Fedify reads quoteUrl / _misskey_quote / quoteUri
const quoteUrl = object.quoteUrl?.href || "";
// Interaction counts — extract from AP Collection objects
const counts = { replies: null, boosts: null, likes: null };
try {
const replies = await object.getReplies?.(loaderOpts);
if (replies?.totalItems != null) counts.replies = replies.totalItems;
} catch { /* ignore — collection may not exist */ }
try {
const likes = await object.getLikes?.(loaderOpts);
if (likes?.totalItems != null) counts.likes = likes.totalItems;
} catch { /* ignore */ }
try {
const shares = await object.getShares?.(loaderOpts);
if (shares?.totalItems != null) counts.boosts = shares.totalItems;
} catch { /* ignore */ }
// Replace custom emoji :shortcode: in content with inline <img> tags.
// Applied after sanitization — these are trusted emoji from the post's tags.
content.html = replaceCustomEmoji(content.html, emojis);
// Build base timeline item
const item = {
uid,
type,
url,
name,
content,
summary,
sensitive,
published,
updated,
author,
category,
mentions,
emojis,
photo,
video,
audio,
inReplyTo,
quoteUrl,
counts,
pollOptions,
votersCount,
pollClosed,
pollEndTime,
createdAt: new Date().toISOString()
};
// Add boost metadata if this is a boost
if (options.boostedBy) {
item.boostedBy = options.boostedBy;
item.boostedAt = options.boostedAt || new Date().toISOString();
item.originalUrl = url;
}
return item;
}