fix: improve timeline content for own posts (4 issues)

1. Empty content on bookmarks/likes/reposts: synthesize content from
   the interaction target URL (bookmark-of, like-of, repost-of) when
   the post has no body text

2. Hashtags not extracted: parse #hashtag patterns from content text
   and merge with explicit categories. Applies to both backfill
   (startup) and POST /api/v1/statuses (runtime)

3. Hashtag links rewritten: /categories/tag/ links (site-internal)
   are rewritten to /tags/tag (Mastodon convention) in the HTML
   content stored in ap_timeline

4. Relative media URLs resolved: photo/video/audio URLs like
   media/photos/... are resolved to absolute URLs using the site URL
This commit is contained in:
Ricardo
2026-03-21 10:34:11 +01:00
parent 2a4ac75c77
commit 01edd6e92e
3 changed files with 345 additions and 6 deletions

View File

@@ -0,0 +1,311 @@
/**
* Backfill ap_timeline from the posts collection.
*
* Runs on startup (idempotent — uses upsert by uid).
* Converts Micropub JF2 posts into ap_timeline format so they
* appear in Mastodon Client API timelines and profile views.
*/
/**
* Backfill ap_timeline with published posts from the posts collection.
*
* @param {object} collections - MongoDB collections (must include posts, ap_timeline, ap_profile)
* @returns {Promise<{ total: number, inserted: number, skipped: number }>}
*/
export async function backfillTimeline(collections) {
const { posts, ap_timeline, ap_profile } = collections;
if (!posts || !ap_timeline) {
return { total: 0, inserted: 0, skipped: 0 };
}
// Get local profile for author info
const profile = await ap_profile?.findOne({});
const siteUrl = profile?.url?.replace(/\/$/, "") || "";
const author = profile
? {
name: profile.name || "",
url: profile.url || "",
photo: profile.icon || "",
handle: "",
}
: { name: "", url: "", photo: "", handle: "" };
// Fetch all published posts
const allPosts = await posts
.find({
"properties.post-status": { $ne: "draft" },
"properties.deleted": { $exists: false },
"properties.url": { $exists: true },
})
.toArray();
let inserted = 0;
let skipped = 0;
for (const post of allPosts) {
const props = post.properties;
if (!props?.url) {
skipped++;
continue;
}
const uid = props.url;
// Check if already in timeline (fast path to avoid unnecessary upserts)
const exists = await ap_timeline.findOne({ uid }, { projection: { _id: 1 } });
if (exists) {
skipped++;
continue;
}
// Build content — interaction types (bookmark, like, repost) may not have
// body content, so synthesize it from the interaction target URL
const content = buildContent(props);
const type = mapPostType(props["post-type"]);
// Extract categories + inline hashtags from content
const categories = normalizeArray(props.category);
const inlineHashtags = extractHashtags(content.text + " " + (content.html || ""));
const mergedCategories = mergeCategories(categories, inlineHashtags);
const timelineItem = {
uid,
url: uid,
type,
content: rewriteHashtagLinks(content, siteUrl),
author,
published: props.published || props.date || new Date().toISOString(),
createdAt: props.published || props.date || new Date().toISOString(),
visibility: "public",
sensitive: false,
category: mergedCategories,
photo: normalizeMediaArray(props.photo, siteUrl),
video: normalizeMediaArray(props.video, siteUrl),
audio: normalizeMediaArray(props.audio, siteUrl),
readBy: [],
};
// Optional fields
if (props.name) timelineItem.name = props.name;
if (props.summary) timelineItem.summary = props.summary;
if (props["in-reply-to"]) {
timelineItem.inReplyTo = Array.isArray(props["in-reply-to"])
? props["in-reply-to"][0]
: props["in-reply-to"];
}
try {
const result = await ap_timeline.updateOne(
{ uid },
{ $setOnInsert: timelineItem },
{ upsert: true },
);
if (result.upsertedCount > 0) {
inserted++;
} else {
skipped++;
}
} catch {
skipped++;
}
}
return { total: allPosts.length, inserted, skipped };
}
// ─── Content Building ─────────────────────────────────────────────────────────
/**
* Build content from JF2 properties, synthesizing content for interaction types.
* Bookmarks, likes, and reposts often have no body text — show the target URL.
*/
function buildContent(props) {
const raw = normalizeContent(props.content);
// If there's already content, use it
if (raw.text || raw.html) return raw;
// Synthesize content for interaction types
const bookmarkOf = props["bookmark-of"];
const likeOf = props["like-of"];
const repostOf = props["repost-of"];
const name = props.name;
if (bookmarkOf) {
const label = name || bookmarkOf;
return {
text: `Bookmarked: ${label}`,
html: `<p>Bookmarked: <a href="${escapeHtml(bookmarkOf)}">${escapeHtml(label)}</a></p>`,
};
}
if (likeOf) {
return {
text: `Liked: ${likeOf}`,
html: `<p>Liked: <a href="${escapeHtml(likeOf)}">${escapeHtml(likeOf)}</a></p>`,
};
}
if (repostOf) {
const label = name || repostOf;
return {
text: `Reposted: ${label}`,
html: `<p>Reposted: <a href="${escapeHtml(repostOf)}">${escapeHtml(label)}</a></p>`,
};
}
// Article with title but no body
if (name) {
return { text: name, html: `<p>${escapeHtml(name)}</p>` };
}
return raw;
}
/**
* Normalize content from JF2 properties to { text, html } format.
*/
function normalizeContent(content) {
if (!content) return { text: "", html: "" };
if (typeof content === "string") return { text: content, html: `<p>${content}</p>` };
if (typeof content === "object") {
return {
text: content.text || content.value || "",
html: content.html || content.text || content.value || "",
};
}
return { text: "", html: "" };
}
// ─── Hashtag Handling ─────────────────────────────────────────────────────────
/**
* Extract hashtags from text content.
* Matches #word patterns, returns lowercase tag names without the # prefix.
*/
function extractHashtags(text) {
if (!text) return [];
const matches = text.match(/(?:^|\s)#([a-zA-Z_]\w*)/g);
if (!matches) return [];
return [...new Set(matches.map((m) => m.trim().slice(1).toLowerCase()))];
}
/**
* Merge explicit categories with inline hashtags (deduplicated, case-insensitive).
*/
function mergeCategories(categories, hashtags) {
const seen = new Set(categories.map((c) => c.toLowerCase()));
const result = [...categories];
for (const tag of hashtags) {
if (!seen.has(tag)) {
seen.add(tag);
result.push(tag);
}
}
return result;
}
/**
* Rewrite hashtag links in HTML from site-internal (/categories/tag/) to
* Mastodon-compatible format. Mastodon clients use the tag objects, not
* inline links, but having correct href helps with link following.
*/
function rewriteHashtagLinks(content, siteUrl) {
if (!content.html) return content;
// Rewrite /categories/tag/ links to /tags/tag (Mastodon convention)
let html = content.html.replace(
/href="\/categories\/([^/"]+)\/?"/g,
(_, tag) => `href="${siteUrl}/tags/${tag}" class="hashtag" rel="tag"`,
);
// Also rewrite absolute site category links
if (siteUrl) {
html = html.replace(
new RegExp(`href="${escapeRegex(siteUrl)}/categories/([^/"]+)/?"`, "g"),
(_, tag) => `href="${siteUrl}/tags/${tag}" class="hashtag" rel="tag"`,
);
}
return { ...content, html };
}
// ─── Post Type Mapping ────────────────────────────────────────────────────────
/**
* Map Micropub post-type to timeline type.
*/
function mapPostType(postType) {
switch (postType) {
case "article":
return "article";
case "photo":
case "video":
case "audio":
return "note";
case "reply":
return "note";
case "repost":
return "boost";
case "like":
case "bookmark":
return "note";
default:
return "note";
}
}
// ─── Normalization Helpers ────────────────────────────────────────────────────
/**
* Normalize a value to an array of strings.
*/
function normalizeArray(value) {
if (!value) return [];
if (Array.isArray(value)) return value.map(String);
return [String(value)];
}
/**
* Normalize media values — resolves relative URLs to absolute.
*
* @param {*} value - String, object with url, or array thereof
* @param {string} siteUrl - Base site URL for resolving relative paths
*/
function normalizeMediaArray(value, siteUrl) {
if (!value) return [];
const arr = Array.isArray(value) ? value : [value];
return arr.map((item) => {
if (typeof item === "string") return resolveUrl(item, siteUrl);
if (typeof item === "object" && item.url) {
return { ...item, url: resolveUrl(item.url, siteUrl) };
}
return null;
}).filter(Boolean);
}
/**
* Resolve a URL — if relative, prepend the site URL.
*/
function resolveUrl(url, siteUrl) {
if (!url) return url;
if (url.startsWith("http://") || url.startsWith("https://")) return url;
if (url.startsWith("/")) return `${siteUrl}${url}`;
return `${siteUrl}/${url}`;
}
/**
* Escape HTML entities.
*/
function escapeHtml(str) {
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
}
/**
* Escape regex special characters.
*/
function escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

View File

@@ -219,6 +219,34 @@ router.post("/api/v1/statuses", async (req, res, next) => {
const handle = pluginOptions.handle || "user";
const actorUrl = profile?.url || `${publicationUrl}/users/${handle}`;
// Extract hashtags from status text and merge with any Micropub categories
const categories = data.properties.category || [];
const inlineHashtags = (statusText || "").match(/(?:^|\s)#([a-zA-Z_]\w*)/g);
if (inlineHashtags) {
const existing = new Set(categories.map((c) => c.toLowerCase()));
for (const match of inlineHashtags) {
const tag = match.trim().slice(1).toLowerCase();
if (!existing.has(tag)) {
existing.add(tag);
categories.push(tag);
}
}
}
// Resolve relative media URLs to absolute
const resolveMedia = (items) => {
if (!items || !items.length) return [];
return items.map((item) => {
if (typeof item === "string") {
return item.startsWith("http") ? item : `${publicationUrl.replace(/\/$/, "")}/${item.replace(/^\//, "")}`;
}
if (item?.url && !item.url.startsWith("http")) {
return { ...item, url: `${publicationUrl.replace(/\/$/, "")}/${item.url.replace(/^\//, "")}` };
}
return item;
});
};
const now = new Date().toISOString();
const timelineItem = await addTimelineItem(collections, {
uid: postUrl,
@@ -234,16 +262,16 @@ router.post("/api/v1/statuses", async (req, res, next) => {
createdAt: now,
author: {
name: profile?.name || handle,
url: actorUrl,
url: profile?.url || publicationUrl,
photo: profile?.icon || "",
handle: `@${handle}`,
emojis: [],
bot: false,
},
photo: data.properties.photo || [],
video: data.properties.video || [],
audio: data.properties.audio || [],
category: data.properties.category || [],
photo: resolveMedia(data.properties.photo || []),
video: resolveMedia(data.properties.video || []),
audio: resolveMedia(data.properties.audio || []),
category: categories,
counts: { replies: 0, boosts: 0, likes: 0 },
linkPreviews: [],
mentions: [],