diff --git a/lib/feeds/normalizer.js b/lib/feeds/normalizer.js index ffd87e2..e010389 100644 --- a/lib/feeds/normalizer.js +++ b/lib/feeds/normalizer.js @@ -7,6 +7,28 @@ import crypto from "node:crypto"; import sanitizeHtml from "sanitize-html"; +/** + * Extract image URLs from HTML content. + * Used as a fallback when no explicit photo/enclosure is provided. + * @param {string} html - HTML content (already sanitized) + * @returns {string[]} Array of image URLs + */ +function extractImagesFromHtml(html) { + if (!html) { + return []; + } + const urls = []; + const imgRegex = /]+src=["']([^"']+)["'][^>]*>/gi; + let match; + while ((match = imgRegex.exec(html)) !== null) { + const src = match[1]; + if (src && !urls.includes(src)) { + urls.push(src); + } + } + return urls; +} + /** * Parse a date string with fallback for non-standard formats * @param {string|Date} dateInput - Date string or Date object @@ -232,6 +254,14 @@ export function normalizeItem(item, feedUrl, feedType) { } } + // Extract images from HTML content as fallback + if (!normalized.photo && normalized.content?.html) { + const extracted = extractImagesFromHtml(normalized.content.html); + if (extracted.length > 0) { + normalized.photo = extracted; + } + } + return normalized; } @@ -395,6 +425,14 @@ export function normalizeJsonFeedItem(item, feedUrl) { normalized["bookmark-of"] = [item.external_url]; } + // Extract images from HTML content as fallback + if (!normalized.photo && normalized.content?.html) { + const extracted = extractImagesFromHtml(normalized.content.html); + if (extracted.length > 0) { + normalized.photo = extracted; + } + } + return normalized; } @@ -568,6 +606,14 @@ export function normalizeHfeedItem(entry, feedUrl) { normalized.syndication = properties.syndication; } + // Extract images from HTML content as fallback + if (!normalized.photo && normalized.content?.html) { + const extracted = extractImagesFromHtml(normalized.content.html); + if (extracted.length > 0) { + normalized.photo = extracted; + } + } + return normalized; } diff --git a/package.json b/package.json index da64cd1..8ca2424 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@rmdes/indiekit-endpoint-microsub", - "version": "1.0.34", + "version": "1.0.35", "description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.", "keywords": [ "indiekit",