mirror of
https://github.com/svemagie/indiekit-endpoint-microsub.git
synced 2026-04-02 15:35:00 +02:00
fix: extract images from HTML content at read time for existing items
The normalizer fix (1.0.35) only applies to newly ingested items. Existing items in MongoDB lack photo arrays because dedup prevents re-processing. Add the same extractImagesFromHtml() fallback in transformToJf2() so images are extracted from content.html at read time, making existing xkcd comics and photo posts display immediately.
This commit is contained in:
@@ -12,6 +12,27 @@ import {
|
||||
parseLimit,
|
||||
} from "../utils/pagination.js";
|
||||
|
||||
/**
|
||||
* Extract image URLs from HTML content (fallback for items without explicit photos)
|
||||
* @param {string} html - HTML content
|
||||
* @returns {string[]} Array of image URLs
|
||||
*/
|
||||
function extractImagesFromHtml(html) {
|
||||
if (!html) {
|
||||
return [];
|
||||
}
|
||||
const urls = [];
|
||||
const imgRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
||||
let match;
|
||||
while ((match = imgRegex.exec(html)) !== null) {
|
||||
const src = match[1];
|
||||
if (src && !urls.includes(src)) {
|
||||
urls.push(src);
|
||||
}
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get items collection from application
|
||||
* @param {object} application - Indiekit application
|
||||
@@ -201,6 +222,14 @@ function transformToJf2(item, userId) {
|
||||
const videos = normalizeMediaArray(item.video);
|
||||
const audios = normalizeMediaArray(item.audio);
|
||||
|
||||
// Fallback: extract images from HTML content if no explicit photos
|
||||
if (photos.length === 0 && item.content?.html) {
|
||||
const extracted = extractImagesFromHtml(item.content.html);
|
||||
if (extracted.length > 0) {
|
||||
photos.push(...extracted);
|
||||
}
|
||||
}
|
||||
|
||||
if (photos.length > 0) jf2.photo = photos;
|
||||
if (videos.length > 0) jf2.video = videos;
|
||||
if (audios.length > 0) jf2.audio = audios;
|
||||
|
||||
Reference in New Issue
Block a user