From cdd4a5801529bc2d62f80c0194a2bca0662c60d0 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 25 Feb 2026 17:20:53 +0100 Subject: [PATCH] fix: extract images from HTML content at read time for existing items The normalizer fix (1.0.35) only applies to newly ingested items. Existing items in MongoDB lack photo arrays because dedup prevents re-processing. Add the same extractImagesFromHtml() fallback in transformToJf2() so images are extracted from content.html at read time, making existing xkcd comics and photo posts display immediately. --- lib/storage/items.js | 29 +++++++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/lib/storage/items.js b/lib/storage/items.js index b85bdf6..227e2a2 100644 --- a/lib/storage/items.js +++ b/lib/storage/items.js @@ -12,6 +12,27 @@ import { parseLimit, } from "../utils/pagination.js"; +/** + * Extract image URLs from HTML content (fallback for items without explicit photos) + * @param {string} html - HTML content + * @returns {string[]} Array of image URLs + */ +function extractImagesFromHtml(html) { + if (!html) { + return []; + } + const urls = []; + const imgRegex = /]+src=["']([^"']+)["'][^>]*>/gi; + let match; + while ((match = imgRegex.exec(html)) !== null) { + const src = match[1]; + if (src && !urls.includes(src)) { + urls.push(src); + } + } + return urls; +} + /** * Get items collection from application * @param {object} application - Indiekit application @@ -201,6 +222,14 @@ function transformToJf2(item, userId) { const videos = normalizeMediaArray(item.video); const audios = normalizeMediaArray(item.audio); + // Fallback: extract images from HTML content if no explicit photos + if (photos.length === 0 && item.content?.html) { + const extracted = extractImagesFromHtml(item.content.html); + if (extracted.length > 0) { + photos.push(...extracted); + } + } + if (photos.length > 0) jf2.photo = photos; if (videos.length > 0) jf2.video = videos; if (audios.length > 0) jf2.audio = audios; diff --git a/package.json b/package.json index 8ca2424..1dfd9b7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@rmdes/indiekit-endpoint-microsub", - "version": "1.0.35", + "version": "1.0.36", "description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.", "keywords": [ "indiekit",