fix: extract images from HTML content at read time for existing items

The normalizer fix (1.0.35) only applies to newly ingested items.
Existing items in MongoDB lack photo arrays because dedup prevents
re-processing. Add the same extractImagesFromHtml() fallback in
transformToJf2() so images are extracted from content.html at read
time, making existing xkcd comics and photo posts display immediately.
This commit is contained in:
Ricardo
2026-02-25 17:20:53 +01:00
parent ee2cd26208
commit cdd4a58015
2 changed files with 30 additions and 1 deletions

View File

@@ -12,6 +12,27 @@ import {
parseLimit, parseLimit,
} from "../utils/pagination.js"; } from "../utils/pagination.js";
/**
* Extract image URLs from HTML content (fallback for items without explicit photos)
* @param {string} html - HTML content
* @returns {string[]} Array of image URLs
*/
function extractImagesFromHtml(html) {
if (!html) {
return [];
}
const urls = [];
const imgRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
let match;
while ((match = imgRegex.exec(html)) !== null) {
const src = match[1];
if (src && !urls.includes(src)) {
urls.push(src);
}
}
return urls;
}
/** /**
* Get items collection from application * Get items collection from application
* @param {object} application - Indiekit application * @param {object} application - Indiekit application
@@ -201,6 +222,14 @@ function transformToJf2(item, userId) {
const videos = normalizeMediaArray(item.video); const videos = normalizeMediaArray(item.video);
const audios = normalizeMediaArray(item.audio); const audios = normalizeMediaArray(item.audio);
// Fallback: extract images from HTML content if no explicit photos
if (photos.length === 0 && item.content?.html) {
const extracted = extractImagesFromHtml(item.content.html);
if (extracted.length > 0) {
photos.push(...extracted);
}
}
if (photos.length > 0) jf2.photo = photos; if (photos.length > 0) jf2.photo = photos;
if (videos.length > 0) jf2.video = videos; if (videos.length > 0) jf2.video = videos;
if (audios.length > 0) jf2.audio = audios; if (audios.length > 0) jf2.audio = audios;

View File

@@ -1,6 +1,6 @@
{ {
"name": "@rmdes/indiekit-endpoint-microsub", "name": "@rmdes/indiekit-endpoint-microsub",
"version": "1.0.35", "version": "1.0.36",
"description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.", "description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.",
"keywords": [ "keywords": [
"indiekit", "indiekit",