indiekit-endpoint-activitypub/lib/timeline-store.js

/**
 * Timeline item extraction helpers
 * @module timeline-store
 */

import { Article, Application, Emoji, Hashtag, Mention, Question, Service } from "@fedify/fedify/vocab";
import sanitizeHtml from "sanitize-html";

/**
 * Sanitize HTML content for safe display
 * @param {string} html - Raw HTML content
 * @returns {string} Sanitized HTML
 */
export function sanitizeContent(html) {
  if (!html) return "";

  return sanitizeHtml(html, {
    allowedTags: [
      "p", "br", "a", "strong", "em", "ul", "ol", "li",
      "blockquote", "code", "pre", "h1", "h2", "h3", "h4", "h5", "h6",
      "span", "div", "img"
    ],
    allowedAttributes: {
      a: ["href", "rel", "class"],
      img: ["src", "alt", "class"],
      span: ["class"],
      div: ["class"]
    },
    allowedSchemes: ["http", "https", "mailto"],
    allowedSchemesByTag: {
      img: ["http", "https"]
    }
  });
}

/**
 * Replace custom emoji :shortcode: placeholders with inline <img> tags.
 * Applied AFTER sanitization — the <img> tags are controlled output from
 * trusted emoji data, not user-supplied HTML.
 *
 * @param {string} html - Content HTML (already sanitized)
 * @param {Array<{shortcode: string, url: string}>} emojis - Custom emoji data
 * @returns {string} HTML with shortcodes replaced by <img> tags
 */
export function replaceCustomEmoji(html, emojis) {
  if (!emojis?.length || !html) return html;
  let result = html;
  for (const { shortcode, url } of emojis) {
    // Validate URL is HTTP(S) only — reject data:, javascript:, etc.
    if (!url || (!url.startsWith("https://") && !url.startsWith("http://"))) continue;
    // Escape HTML special characters in URL and shortcode to prevent attribute injection
    const safeUrl = url.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
    const safeShortcode = shortcode.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
    const escaped = shortcode.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
    const pattern = new RegExp(`:${escaped}:`, "g");
    result = result.replace(
      pattern,
      `<img class="ap-custom-emoji" src="${safeUrl}" alt=":${safeShortcode}:" title=":${safeShortcode}:" draggable="false">`,
    );
  }
  return result;
}

/**
 * Extract actor information from Fedify Person/Application/Service object
 * @param {object} actor - Fedify actor object
 * @param {object} [options] - Options
 * @param {object} [options.documentLoader] - Authenticated DocumentLoader for Secure Mode servers
 * @returns {object} { name, url, photo, handle }
 */
export async function extractActorInfo(actor, options = {}) {
  if (!actor) {
    return {
      name: "Unknown",
      url: "",
      photo: "",
      handle: ""
    };
  }

  const rawName = actor.name?.toString() || actor.preferredUsername?.toString() || "Unknown";
  // Strip all HTML from actor names to prevent stored XSS
  const name = sanitizeHtml(rawName, { allowedTags: [], allowedAttributes: {} });
  const url = actor.id?.href || "";

  // Extract photo URL from icon (Fedify uses async getters)
  const loaderOpts = options.documentLoader ? { documentLoader: options.documentLoader } : {};
  let photo = "";
  try {
    if (typeof actor.getIcon === "function") {
      const iconObj = await actor.getIcon(loaderOpts);
      photo = iconObj?.url?.href || "";
    } else {
      const iconObj = await actor.icon;
      photo = iconObj?.url?.href || "";
    }
  } catch {
    // No icon available
  }

  // Extract handle from actor URL
  let handle = "";
  try {
    const actorUrl = new URL(url);
    const username = actor.preferredUsername?.toString() || "";
    if (username) {
      handle = `@${username}@${actorUrl.hostname}`;
    }
  } catch {
    // Invalid URL, keep handle empty
  }

  // Extract custom emoji from actor tags
  const emojis = [];
  try {
    if (typeof actor.getTags === "function") {
      const tags = await actor.getTags(loaderOpts);
      for await (const tag of tags) {
        if (tag instanceof Emoji) {
          const shortcode = (tag.name?.toString() || "").replace(/^:|:$/g, "");
          const iconUrl = tag.iconId?.href || "";
          if (shortcode && iconUrl) {
            emojis.push({ shortcode, url: iconUrl });
          }
        }
      }
    }
  } catch {
    // Emoji extraction failed — non-critical
  }

  // Bot detection — Service and Application actors are automated accounts
  const bot = actor instanceof Service || actor instanceof Application;

  // Replace custom emoji shortcodes in display name with <img> tags
  const nameHtml = replaceCustomEmoji(name, emojis);

  return { name, nameHtml, url, photo, handle, emojis, bot };
}

/**
 * Extract timeline item data from Fedify Note/Article object
 * @param {object} object - Fedify Note or Article object
 * @param {object} options - Extraction options
 * @param {object} [options.boostedBy] - Actor info for boosts
 * @param {Date} [options.boostedAt] - Boost timestamp
 * @param {object} [options.actorFallback] - Fedify actor to use when object.getAttributedTo() fails
 * @param {object} [options.documentLoader] - Authenticated DocumentLoader for Secure Mode servers
 * @returns {Promise<object>} Timeline item data with:
 *   - category: string[] — hashtag names (stripped of # prefix)
 *   - mentions: Array<{name: string, url: string}> — @mention entries with actor URLs
 */
export async function extractObjectData(object, options = {}) {
  if (!object) {
    throw new Error("Object is required");
  }

  const uid = object.id?.href || "";
  const url = object.url?.href || uid;

  // Determine type — use instanceof for Fedify vocab objects
  let type = "note";
  if (object instanceof Article) {
    type = "article";
  }
  if (object instanceof Question) {
    type = "question";
  }
  if (options.boostedBy) {
    type = "boost";
  }

  // Extract content
  const contentHtml = object.content?.toString() || "";
  const contentText = object.source?.content?.toString() || contentHtml.replace(/<[^>]*>/g, "");

  const content = {
    text: contentText,
    html: sanitizeContent(contentHtml)
  };

  // Extract name (articles only)
  const name = type === "article" ? (object.name?.toString() || "") : "";

  // Content warning / summary
  const summary = object.summary?.toString() || "";
  const sensitive = object.sensitive || false;

  // Poll options (Question type)
  let pollOptions = [];
  let votersCount = 0;
  let pollClosed = false;
  let pollEndTime = "";

  if (object instanceof Question) {
    try {
      const exclusive = [];
      for await (const opt of object.getExclusiveOptions?.() || []) {
        exclusive.push({
          name: opt.name?.toString() || "",
          votes: typeof opt.replies?.totalItems === "number" ? opt.replies.totalItems : 0,
        });
      }
      const inclusive = [];
      for await (const opt of object.getInclusiveOptions?.() || []) {
        inclusive.push({
          name: opt.name?.toString() || "",
          votes: typeof opt.replies?.totalItems === "number" ? opt.replies.totalItems : 0,
        });
      }
      pollOptions = exclusive.length > 0 ? exclusive : inclusive;
    } catch {
      // Poll options couldn't be extracted — show as regular post
    }

    votersCount = typeof object.voters === "number" ? object.voters : 0;
    pollEndTime = object.endTime ? String(object.endTime) : "";
    const closedValue = object.closed;
    pollClosed = closedValue === true || (closedValue != null && closedValue !== false);
  }

  // Published date — store as ISO string per Indiekit convention
  const published = object.published
    ? String(object.published)
    : new Date().toISOString();

  // Edited date — non-null when the post has been updated after publishing
  const updated = object.updated ? String(object.updated) : "";

  // Extract author — try multiple strategies in order of reliability
  const loaderOpts = options.documentLoader ? { documentLoader: options.documentLoader } : {};
  let authorObj = null;
  try {
    if (typeof object.getAttributedTo === "function") {
      const attr = await object.getAttributedTo(loaderOpts);
      authorObj = Array.isArray(attr) ? attr[0] : attr;
    }
  } catch {
    // getAttributedTo() failed (unreachable, deleted, etc.)
  }
  // If getAttributedTo() returned nothing, use the actor from the wrapping activity
  if (!authorObj && options.actorFallback) {
    authorObj = options.actorFallback;
  }
  // Try direct property access for plain objects
  if (!authorObj) {
    authorObj = object.attribution || object.attributedTo || null;
  }

  let author;
  if (authorObj) {
    author = await extractActorInfo(authorObj, loaderOpts);
  } else {
    // Last resort: use attributionIds (non-fetching) to get at least a URL
    const attrIds = object.attributionIds;
    if (attrIds && attrIds.length > 0) {
      const authorUrl = attrIds[0].href;
      const parsedUrl = new URL(authorUrl);
      const authorHostname = parsedUrl.hostname;
      // Extract username from common URL patterns:
      //   /@username, /users/username, /ap/users/12345/
      const pathname = parsedUrl.pathname;
      let username = "";
      const atPattern = pathname.match(/\/@([^/]+)/);
      const usersPattern = pathname.match(/\/users\/([^/]+)/);
      if (atPattern) {
        username = atPattern[1];
      } else if (usersPattern) {
        username = usersPattern[1];
      }
      author = {
        name: username || authorHostname,
        url: authorUrl,
        photo: "",
        handle: username ? `@${username}@${authorHostname}` : "",
      };
    } else {
      author = { name: "Unknown", url: "", photo: "", handle: "" };
    }
  }

  // Extract tags — Fedify uses async getTags() which returns typed vocab objects.
  // Hashtag → category[] (plain strings, # prefix stripped)
  // Mention → mentions[] ({ name, url } objects for profile linking)
  // Emoji → emojis[] ({ shortcode, url } for custom emoji rendering)
  const category = [];
  const mentions = [];
  const emojis = [];
  try {
    if (typeof object.getTags === "function") {
      const tags = await object.getTags(loaderOpts);
      for await (const tag of tags) {
        if (tag instanceof Hashtag) {
          const tagName = tag.name?.toString().replace(/^#/, "") || "";
          if (tagName) category.push(tagName);
        } else if (tag instanceof Mention) {
          // Strip leading @ from name (Fedify Mention names start with @)
          const rawName = tag.name?.toString() || "";
          const mentionName = rawName.startsWith("@") ? rawName.slice(1) : rawName;
          // tag.href is a URL object — use .href to get the string
          const mentionUrl = tag.href?.href || "";
          if (mentionName) mentions.push({ name: mentionName, url: mentionUrl });
        } else if (tag instanceof Emoji) {
          // Custom emoji: name is ":shortcode:", icon is an Image with url
          const shortcode = (tag.name?.toString() || "").replace(/^:|:$/g, "");
          const iconUrl = tag.iconId?.href || "";
          if (shortcode && iconUrl) {
            emojis.push({ shortcode, url: iconUrl });
          }
        }
      }
    }
  } catch {
    // Tags extraction failed — non-critical
  }

  // Extract media attachments — Fedify uses async getAttachments()
  const photo = [];
  const video = [];
  const audio = [];

  try {
    if (typeof object.getAttachments === "function") {
      const attachments = await object.getAttachments(loaderOpts);
      for await (const att of attachments) {
        const mediaUrl = att.url?.href || "";
        if (!mediaUrl) continue;

        const mediaType = att.mediaType?.toLowerCase() || "";

        if (mediaType.startsWith("image/")) {
          photo.push({
            url: mediaUrl,
            alt: att.name?.toString() || "",
            width: att.width || null,
            height: att.height || null,
          });
        } else if (mediaType.startsWith("video/")) {
          video.push(mediaUrl);
        } else if (mediaType.startsWith("audio/")) {
          audio.push(mediaUrl);
        }
      }
    }
  } catch {
    // Attachment extraction failed — non-critical
  }

  // In-reply-to — Fedify uses replyTargetId (non-fetching)
  const inReplyTo = object.replyTargetId?.href || "";

  // Quote URL — Fedify reads quoteUrl / _misskey_quote / quoteUri
  const quoteUrl = object.quoteUrl?.href || "";

  // Interaction counts — not fetched at ingest time. The three collection
  // fetches (getReplies, getLikes, getShares) each trigger an HTTP round-trip
  // for counts that are ephemeral and stale moments after fetching. Removed
  // per audit M11 to save 3 network calls per inbound activity.
  const counts = { replies: null, boosts: null, likes: null };

  // Replace custom emoji :shortcode: in content with inline <img> tags.
  // Applied after sanitization — these are trusted emoji from the post's tags.
  content.html = replaceCustomEmoji(content.html, emojis);

  // Build base timeline item
  const item = {
    uid,
    type,
    url,
    name,
    content,
    summary,
    sensitive,
    published,
    updated,
    author,
    category,
    mentions,
    emojis,
    photo,
    video,
    audio,
    inReplyTo,
    quoteUrl,
    counts,
    pollOptions,
    votersCount,
    pollClosed,
    pollEndTime,
    createdAt: new Date().toISOString()
  };

  // Add boost metadata if this is a boost
  if (options.boostedBy) {
    item.boostedBy = options.boostedBy;
    item.boostedAt = options.boostedAt || new Date().toISOString();
    item.originalUrl = url;
  }

  return item;
}