From 3ca920089b575d9132cf032ae465f3d0d7e76b68 Mon Sep 17 00:00:00 2001 From: Sven Date: Sat, 14 Mar 2026 09:27:22 +0100 Subject: [PATCH] fix: improve microsub feed discovery via tags When a bookmarked URL is an HTML page whose feed is not at a common path (/feed, /rss.xml etc.), fetchAndParseFeed would throw and store no items in microsub_items. Sites like econsoc.mpifg.de or signal.org post pages advertise their feed via a standard element, which discoverFeeds() already parses but was never called from the fetch/parse pipeline. Now, before probing common paths, fetchAndParseFeed calls discoverFeeds() on the fetched HTML and uses any typed RSS/Atom/JSONFeed link it finds. Common-path probing remains as the final fallback. Co-Authored-By: Claude Sonnet 4.6 --- package.json | 4 +- scripts/patch-microsub-feed-discovery.mjs | 149 ++++++++++++++++++++++ 2 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 scripts/patch-microsub-feed-discovery.mjs diff --git a/package.json b/package.json index a8617228..76ca5e56 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,8 @@ "description": "", "main": "index.js", "scripts": { - "postinstall": "xattr -w com.apple.fileprovider.ignore#P 1 node_modules 2>/dev/null || true && node scripts/patch-lightningcss.mjs && node scripts/patch-endpoint-media-scope.mjs && node scripts/patch-endpoint-media-sharp-runtime.mjs && node scripts/patch-frontend-sharp-runtime.mjs && node scripts/patch-endpoint-files-upload-route.mjs && node scripts/patch-endpoint-files-upload-locales.mjs && node scripts/patch-endpoint-activitypub-locales.mjs && node scripts/patch-endpoint-activitypub-private-url-docloader.mjs && node scripts/patch-endpoint-homepage-locales.mjs && node scripts/patch-endpoint-homepage-identity-defaults.mjs && node scripts/patch-federation-unlisted-guards.mjs && node scripts/patch-endpoint-micropub-where-note-visibility.mjs && node scripts/patch-endpoint-posts-ai-fields.mjs && node scripts/patch-endpoint-posts-ai-cleanup.mjs && node scripts/patch-endpoint-podroll-opml-upload.mjs && node scripts/patch-preset-eleventy-ai-frontmatter.mjs && node scripts/patch-micropub-ai-block-resync.mjs && node scripts/patch-frontend-serviceworker-file.mjs && node scripts/patch-endpoint-comments-locales.mjs && node scripts/patch-conversations-collection-guards.mjs && node scripts/patch-conversations-mastodon-disconnect.mjs && node scripts/patch-indiekit-routes-rate-limits.mjs && node scripts/patch-indiekit-error-production-stack.mjs && node scripts/patch-indieauth-devmode-guard.mjs && node scripts/patch-listening-endpoint-runtime-guards.mjs && node scripts/patch-endpoint-github-changelog-categories.mjs && node scripts/patch-microsub-reader-ap-dispatch.mjs && node scripts/patch-endpoint-blogroll-feeds-alias.mjs && node scripts/patch-endpoint-posts-uid-lookup.mjs && node scripts/patch-endpoint-posts-prefill-url.mjs", - "serve":"export NODE_ENV=${NODE_ENV:-production} INDIEKIT_DEBUG=${INDIEKIT_DEBUG:-0} && node scripts/preflight-production-security.mjs && node scripts/preflight-mongo-connection.mjs && node scripts/preflight-activitypub-rsa-key.mjs && node scripts/preflight-activitypub-profile-urls.mjs && node scripts/patch-lightningcss.mjs && node scripts/patch-endpoint-media-scope.mjs && node scripts/patch-endpoint-media-sharp-runtime.mjs && node scripts/patch-frontend-sharp-runtime.mjs && node scripts/patch-endpoint-files-upload-route.mjs && node scripts/patch-endpoint-files-upload-locales.mjs && node scripts/patch-endpoint-activitypub-locales.mjs && node scripts/patch-endpoint-activitypub-private-url-docloader.mjs && node scripts/patch-endpoint-homepage-locales.mjs && node scripts/patch-endpoint-homepage-identity-defaults.mjs && node scripts/patch-federation-unlisted-guards.mjs && node scripts/patch-endpoint-micropub-where-note-visibility.mjs && node scripts/patch-endpoint-posts-ai-fields.mjs && node scripts/patch-endpoint-posts-ai-cleanup.mjs && node scripts/patch-endpoint-podroll-opml-upload.mjs && node scripts/patch-preset-eleventy-ai-frontmatter.mjs && node scripts/patch-micropub-ai-block-resync.mjs && node scripts/patch-frontend-serviceworker-file.mjs && node scripts/patch-endpoint-comments-locales.mjs && node scripts/patch-conversations-collection-guards.mjs && node scripts/patch-conversations-mastodon-disconnect.mjs && node scripts/patch-indiekit-routes-rate-limits.mjs && node scripts/patch-indiekit-error-production-stack.mjs && node scripts/patch-indieauth-devmode-guard.mjs && node scripts/patch-listening-endpoint-runtime-guards.mjs && node scripts/patch-endpoint-github-changelog-categories.mjs && node scripts/patch-microsub-reader-ap-dispatch.mjs && node scripts/patch-endpoint-blogroll-feeds-alias.mjs && node scripts/patch-endpoint-posts-uid-lookup.mjs && node scripts/patch-endpoint-posts-prefill-url.mjs && node node_modules/@indiekit/indiekit/bin/cli.js serve --config indiekit.config.mjs", + "postinstall": "xattr -w com.apple.fileprovider.ignore#P 1 node_modules 2>/dev/null || true && node scripts/patch-lightningcss.mjs && node scripts/patch-endpoint-media-scope.mjs && node scripts/patch-endpoint-media-sharp-runtime.mjs && node scripts/patch-frontend-sharp-runtime.mjs && node scripts/patch-endpoint-files-upload-route.mjs && node scripts/patch-endpoint-files-upload-locales.mjs && node scripts/patch-endpoint-activitypub-locales.mjs && node scripts/patch-endpoint-activitypub-private-url-docloader.mjs && node scripts/patch-endpoint-homepage-locales.mjs && node scripts/patch-endpoint-homepage-identity-defaults.mjs && node scripts/patch-federation-unlisted-guards.mjs && node scripts/patch-endpoint-micropub-where-note-visibility.mjs && node scripts/patch-endpoint-posts-ai-fields.mjs && node scripts/patch-endpoint-posts-ai-cleanup.mjs && node scripts/patch-endpoint-podroll-opml-upload.mjs && node scripts/patch-preset-eleventy-ai-frontmatter.mjs && node scripts/patch-micropub-ai-block-resync.mjs && node scripts/patch-frontend-serviceworker-file.mjs && node scripts/patch-endpoint-comments-locales.mjs && node scripts/patch-conversations-collection-guards.mjs && node scripts/patch-conversations-mastodon-disconnect.mjs && node scripts/patch-indiekit-routes-rate-limits.mjs && node scripts/patch-indiekit-error-production-stack.mjs && node scripts/patch-indieauth-devmode-guard.mjs && node scripts/patch-listening-endpoint-runtime-guards.mjs && node scripts/patch-endpoint-github-changelog-categories.mjs && node scripts/patch-microsub-reader-ap-dispatch.mjs && node scripts/patch-endpoint-blogroll-feeds-alias.mjs && node scripts/patch-endpoint-posts-uid-lookup.mjs && node scripts/patch-endpoint-posts-prefill-url.mjs && node scripts/patch-microsub-feed-discovery.mjs", + "serve":"export NODE_ENV=${NODE_ENV:-production} INDIEKIT_DEBUG=${INDIEKIT_DEBUG:-0} && node scripts/preflight-production-security.mjs && node scripts/preflight-mongo-connection.mjs && node scripts/preflight-activitypub-rsa-key.mjs && node scripts/preflight-activitypub-profile-urls.mjs && node scripts/patch-lightningcss.mjs && node scripts/patch-endpoint-media-scope.mjs && node scripts/patch-endpoint-media-sharp-runtime.mjs && node scripts/patch-frontend-sharp-runtime.mjs && node scripts/patch-endpoint-files-upload-route.mjs && node scripts/patch-endpoint-files-upload-locales.mjs && node scripts/patch-endpoint-activitypub-locales.mjs && node scripts/patch-endpoint-activitypub-private-url-docloader.mjs && node scripts/patch-endpoint-homepage-locales.mjs && node scripts/patch-endpoint-homepage-identity-defaults.mjs && node scripts/patch-federation-unlisted-guards.mjs && node scripts/patch-endpoint-micropub-where-note-visibility.mjs && node scripts/patch-endpoint-posts-ai-fields.mjs && node scripts/patch-endpoint-posts-ai-cleanup.mjs && node scripts/patch-endpoint-podroll-opml-upload.mjs && node scripts/patch-preset-eleventy-ai-frontmatter.mjs && node scripts/patch-micropub-ai-block-resync.mjs && node scripts/patch-frontend-serviceworker-file.mjs && node scripts/patch-endpoint-comments-locales.mjs && node scripts/patch-conversations-collection-guards.mjs && node scripts/patch-conversations-mastodon-disconnect.mjs && node scripts/patch-indiekit-routes-rate-limits.mjs && node scripts/patch-indiekit-error-production-stack.mjs && node scripts/patch-indieauth-devmode-guard.mjs && node scripts/patch-listening-endpoint-runtime-guards.mjs && node scripts/patch-endpoint-github-changelog-categories.mjs && node scripts/patch-microsub-reader-ap-dispatch.mjs && node scripts/patch-endpoint-blogroll-feeds-alias.mjs && node scripts/patch-endpoint-posts-uid-lookup.mjs && node scripts/patch-endpoint-posts-prefill-url.mjs && node scripts/patch-microsub-feed-discovery.mjs && node node_modules/@indiekit/indiekit/bin/cli.js serve --config indiekit.config.mjs", "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], diff --git a/scripts/patch-microsub-feed-discovery.mjs b/scripts/patch-microsub-feed-discovery.mjs new file mode 100644 index 00000000..4dafadb2 --- /dev/null +++ b/scripts/patch-microsub-feed-discovery.mjs @@ -0,0 +1,149 @@ +/** + * Patch: improve feed discovery in fetchAndParseFeed + * + * Problem: when a bookmarked URL is an HTML page (article, site root, etc.), + * fetchAndParseFeed only tries a fixed list of common paths (/feed, /rss.xml …). + * This misses sites whose feed URL is advertised via a + * + * tag in the page . For example, econsoc.mpifg.de, signal.org blog + * pages, and Substack article URLs all work through discovery but + * fail the common-path probe. + * + * Solution: before falling back to tryCommonFeedPaths, call discoverFeeds() + * (which already parses tags) on the fetched HTML + * content. If it finds a typed RSS/Atom/JSONFeed link, use that URL. + * Only if link-based discovery also fails do we probe common paths. + */ +import { access, readFile, writeFile } from "node:fs/promises"; + +async function exists(p) { + try { + await access(p); + return true; + } catch { + return false; + } +} + +const patchSpecs = [ + { + name: "microsub-html-feed-discovery", + marker: "link-based discovery from HTML", + candidates: [ + "node_modules/@rmdes/indiekit-endpoint-microsub/lib/feeds/fetcher.js", + "node_modules/@indiekit/indiekit/node_modules/@rmdes/indiekit-endpoint-microsub/lib/feeds/fetcher.js", + ], + oldSnippet: ` // If we got ActivityPub or unknown, try common feed paths + if (feedType === "activitypub" || feedType === "unknown") { + const fallbackFeed = await tryCommonFeedPaths(url, options); + if (fallbackFeed) { + // Fetch and parse the discovered feed + const feedResult = await fetchFeed(fallbackFeed.url, options); + if (!feedResult.notModified) { + const fallbackType = detectFeedType(feedResult.content, feedResult.contentType); + const parsed = await parseFeed(feedResult.content, fallbackFeed.url, { + contentType: feedResult.contentType, + }); + return { + ...feedResult, + ...parsed, + feedType: fallbackType, + hub: feedResult.hub || parsed._hub, + discoveredFrom: url, + }; + } + } + throw new Error( + \`Unable to find a feed at \${url}. Try the direct feed URL.\`, + ); + }`, + newSnippet: ` // If we got ActivityPub or unknown, try link-based discovery then common paths + if (feedType === "activitypub" || feedType === "unknown") { + // 1. link-based discovery from HTML: parse + let discoveredFeedUrl; + if (result.content) { + const { discoverFeeds } = await import("./hfeed.js"); + const discovered = await discoverFeeds(result.content, url); + const rssOrAtom = discovered.find( + (f) => f.type === "rss" || f.type === "atom" || f.type === "jsonfeed", + ); + if (rssOrAtom) discoveredFeedUrl = rssOrAtom.url; + } + + // 2. Fall back to common feed paths (/feed, /rss.xml, etc.) + const fallbackFeed = discoveredFeedUrl + ? { url: discoveredFeedUrl } + : await tryCommonFeedPaths(url, options); + + if (fallbackFeed) { + // Fetch and parse the discovered feed + const feedResult = await fetchFeed(fallbackFeed.url, options); + if (!feedResult.notModified) { + const fallbackType = detectFeedType(feedResult.content, feedResult.contentType); + const parsed = await parseFeed(feedResult.content, fallbackFeed.url, { + contentType: feedResult.contentType, + }); + return { + ...feedResult, + ...parsed, + feedType: fallbackType, + hub: feedResult.hub || parsed._hub, + discoveredFrom: url, + }; + } + } + throw new Error( + \`Unable to find a feed at \${url}. Try the direct feed URL.\`, + ); + }`, + }, +]; + +let filesChecked = 0; +let filesPatched = 0; + +for (const spec of patchSpecs) { + let foundAnyTarget = false; + + for (const filePath of spec.candidates) { + if (!(await exists(filePath))) { + continue; + } + + foundAnyTarget = true; + filesChecked += 1; + + const source = await readFile(filePath, "utf8"); + + if (source.includes(spec.marker)) { + console.log(`[postinstall] ${spec.name}: already patched, skipping`); + continue; + } + + if (!source.includes(spec.oldSnippet)) { + console.warn( + `[postinstall] ${spec.name}: target snippet not found in ${filePath} — may have been updated upstream`, + ); + continue; + } + + const updated = source.replace(spec.oldSnippet, spec.newSnippet); + await writeFile(filePath, updated, "utf8"); + filesPatched += 1; + console.log(`[postinstall] ${spec.name}: patched ${filePath}`); + } + + if (!foundAnyTarget) { + console.log(`[postinstall] ${spec.name}: no target files found`); + } +} + +if (filesChecked === 0) { + console.log("[postinstall] No microsub fetcher files found"); +} else if (filesPatched === 0) { + console.log("[postinstall] microsub HTML feed discovery already patched"); +} else { + console.log( + `[postinstall] Patched microsub HTML feed discovery in ${filesPatched}/${filesChecked} file(s)`, + ); +}