feat: Phase 1 - Enhanced feed discovery with validation

- Add validator.js: validateFeedUrl with comments feed detection
- Add discovery.js: discoverAndValidateFeeds with type labels
- Add opml.js: OPML 2.0 export of all subscriptions
- Update reader.js: searchFeeds uses validation, subscribe validates
- Update feeds.js: updateFeedStatus for health tracking
- Update search.njk: Show feed types, validation status, error messages
- Add CSS for badges, notices, and invalid feed styling
- Register OPML export route at /reader/opml

Phase 1 of blogroll implementation plan.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Ricardo
2026-02-07 01:39:58 +01:00
parent 6caf37a003
commit ab6f81bf72
9 changed files with 609 additions and 10 deletions

View File

@@ -763,3 +763,108 @@
width: 100%;
}
}
/* ==========================================================================
Badges (for feed types, validation status)
========================================================================== */
.badge {
border-radius: var(--border-radius);
display: inline-block;
font-size: var(--font-size-small);
font-weight: 500;
padding: 2px var(--space-xs);
vertical-align: middle;
}
.badge--info {
background: var(--color-primary);
color: var(--color-background);
}
.badge--warning {
background: var(--color-warning, #ffcc00);
color: #000;
}
.badge--error {
background: var(--color-error, #ff4444);
color: #fff;
}
.badge--success {
background: var(--color-success, #22c55e);
color: #fff;
}
/* ==========================================================================
Search Enhancements (feed validation)
========================================================================== */
.search__name {
display: block;
font-weight: 600;
margin-bottom: var(--space-xs);
}
.search__type {
margin-left: var(--space-xs);
}
.search__error {
color: var(--color-error, #ff4444);
display: block;
font-size: var(--font-size-small);
margin-top: var(--space-xs);
}
.search__item--invalid {
opacity: 0.7;
}
.search__item--comments {
border-left: 3px solid var(--color-warning, #ffcc00);
}
.search__invalid-badge {
background: var(--color-error, #ff4444);
border-radius: var(--border-radius);
color: #fff;
font-size: var(--font-size-small);
font-weight: 500;
padding: var(--space-xs) var(--space-s);
}
.search__subscribe {
align-items: center;
display: flex;
gap: var(--space-s);
}
/* ==========================================================================
Notices (errors, warnings)
========================================================================== */
.notice {
border-radius: var(--border-radius);
margin-bottom: var(--space-m);
padding: var(--space-m);
}
.notice--error {
background: rgba(var(--color-error-rgb, 255, 68, 68), 0.1);
border: 1px solid var(--color-error, #ff4444);
color: var(--color-error, #ff4444);
}
.notice--warning {
background: rgba(255, 204, 0, 0.1);
border: 1px solid var(--color-warning, #ffcc00);
color: #856404;
}
.notice--success {
background: rgba(34, 197, 94, 0.1);
border: 1px solid var(--color-success, #22c55e);
color: var(--color-success, #22c55e);
}

View File

@@ -3,6 +3,7 @@ import path from "node:path";
import express from "express";
import { microsubController } from "./lib/controllers/microsub.js";
import { opmlController } from "./lib/controllers/opml.js";
import { readerController } from "./lib/controllers/reader.js";
import { handleMediaProxy } from "./lib/media/proxy.js";
import { startScheduler, stopScheduler } from "./lib/polling/scheduler.js";
@@ -97,6 +98,7 @@ export default class MicrosubEndpoint {
readerRouter.post("/search", readerController.searchFeeds);
readerRouter.post("/subscribe", readerController.subscribe);
readerRouter.post("/api/mark-read", readerController.markAllRead);
readerRouter.get("/opml", opmlController.exportOpml);
router.use("/reader", readerRouter);
return router;

151
lib/controllers/opml.js Normal file
View File

@@ -0,0 +1,151 @@
/**
* OPML export controller
* @module controllers/opml
*/
import { getChannels } from "../storage/channels.js";
import { getFeedsForChannel } from "../storage/feeds.js";
import { getUserId } from "../utils/auth.js";
/**
* Generate OPML export of all subscriptions
* GET /opml
* @param {object} request - Express request
* @param {object} response - Express response
* @returns {Promise<void>}
*/
async function exportOpml(request, response) {
const { application } = request.app.locals;
const userId = getUserId(request);
const channels = await getChannels(application, userId);
// Build OPML structure
const outlines = [];
for (const channel of channels) {
const feeds = await getFeedsForChannel(application, channel._id);
if (feeds.length === 0) continue;
const channelOutlines = feeds.map((feed) => ({
text: feed.title || extractDomain(feed.url),
title: feed.title || "",
type: "rss",
xmlUrl: feed.url,
htmlUrl: deriveSiteUrl(feed.url),
}));
outlines.push({
text: channel.name,
title: channel.name,
children: channelOutlines,
});
}
const siteUrl = application.publication?.me || "https://example.com";
const siteName = extractDomain(siteUrl);
const opml = generateOpmlXml({
title: `${siteName} - Microsub Subscriptions`,
dateCreated: new Date().toUTCString(),
ownerName: userId,
outlines,
});
response.set("Content-Type", "text/x-opml");
response.set(
"Content-Disposition",
'attachment; filename="subscriptions.opml"',
);
response.send(opml);
}
/**
* Generate OPML XML from data
* @param {object} data - OPML data
* @param {string} data.title - Document title
* @param {string} data.dateCreated - Creation date
* @param {string} data.ownerName - Owner name
* @param {Array} data.outlines - Outline items
* @returns {string} OPML XML string
*/
function generateOpmlXml({ title, dateCreated, ownerName, outlines }) {
const renderOutline = (outline, indent = " ") => {
if (outline.children) {
const childrenXml = outline.children
.map((child) => renderOutline(child, indent + " "))
.join("\n");
return `${indent}<outline text="${escapeXml(outline.text)}" title="${escapeXml(outline.title)}">\n${childrenXml}\n${indent}</outline>`;
}
return `${indent}<outline text="${escapeXml(outline.text)}" title="${escapeXml(outline.title)}" type="${outline.type}" xmlUrl="${escapeXml(outline.xmlUrl)}" htmlUrl="${escapeXml(outline.htmlUrl)}"/>`;
};
const outlinesXml = outlines.map((o) => renderOutline(o)).join("\n");
return `<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>${escapeXml(title)}</title>
<dateCreated>${dateCreated}</dateCreated>
<ownerName>${escapeXml(ownerName)}</ownerName>
</head>
<body>
${outlinesXml}
</body>
</opml>`;
}
/**
* Escape XML special characters
* @param {string} str - String to escape
* @returns {string} Escaped string
*/
function escapeXml(str) {
if (!str) return "";
return String(str)
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
/**
* Extract domain from URL
* @param {string} url - URL to extract domain from
* @returns {string} Domain
*/
function extractDomain(url) {
try {
return new URL(url).hostname;
} catch {
return url;
}
}
/**
* Derive site URL from feed URL
* @param {string} feedUrl - Feed URL
* @returns {string} Site URL
*/
function deriveSiteUrl(feedUrl) {
try {
const url = new URL(feedUrl);
// Remove common feed paths
const path = url.pathname
.replace(/\/feed\/?$/, "")
.replace(/\/rss\/?$/, "")
.replace(/\/atom\.xml$/, "")
.replace(/\/rss\.xml$/, "")
.replace(/\/feed\.xml$/, "")
.replace(/\/index\.xml$/, "")
.replace(/\.rss$/, "")
.replace(/\.atom$/, "");
return `${url.origin}${path || "/"}`;
} catch {
return feedUrl;
}
}
export const opmlController = { exportOpml };

View File

@@ -3,7 +3,8 @@
* @module controllers/reader
*/
import { discoverFeedsFromUrl } from "../feeds/fetcher.js";
import { discoverAndValidateFeeds } from "../feeds/discovery.js";
import { validateFeedUrl } from "../feeds/validator.js";
import { refreshFeedNow } from "../polling/scheduler.js";
import {
getChannels,
@@ -585,7 +586,7 @@ export async function searchPage(request, response) {
}
/**
* Search for feeds from URL
* Search for feeds from URL - enhanced with validation
* @param {object} request - Express request
* @param {object} response - Express response
* @returns {Promise<void>}
@@ -598,11 +599,14 @@ export async function searchFeeds(request, response) {
const channelList = await getChannels(application, userId);
let results = [];
let discoveryError = null;
if (query) {
try {
results = await discoverFeedsFromUrl(query);
} catch {
// Ignore discovery errors
// Use enhanced discovery with validation
results = await discoverAndValidateFeeds(query);
} catch (error) {
discoveryError = error.message;
}
}
@@ -611,13 +615,14 @@ export async function searchFeeds(request, response) {
channels: channelList,
query,
results,
discoveryError,
searched: true,
baseUrl: request.baseUrl,
});
}
/**
* Subscribe to a feed from search results
* Subscribe to a feed from search results - with validation
* @param {object} request - Express request
* @param {object} response - Express response
* @returns {Promise<void>}
@@ -625,13 +630,34 @@ export async function searchFeeds(request, response) {
export async function subscribe(request, response) {
const { application } = request.app.locals;
const userId = getUserId(request);
const { url, channel: channelUid } = request.body;
const { url, channel: channelUid, skipValidation } = request.body;
const channelDocument = await getChannel(application, channelUid, userId);
if (!channelDocument) {
return response.status(404).render("404");
}
// Validate feed unless explicitly skipped (for power users)
if (!skipValidation) {
const validation = await validateFeedUrl(url);
if (!validation.valid) {
const channelList = await getChannels(application, userId);
return response.render("search", {
title: request.__("microsub.search.title"),
channels: channelList,
query: url,
validationError: validation.error,
baseUrl: request.baseUrl,
});
}
// Warn about comments feeds but allow subscription
if (validation.isCommentsFeed) {
console.warn(`[Microsub] Subscribing to comments feed: ${url}`);
}
}
// Create feed subscription
const feed = await createFeed(application, {
channelId: channelDocument._id,

95
lib/feeds/discovery.js Normal file
View File

@@ -0,0 +1,95 @@
/**
* Enhanced feed discovery with type labels and validation
* @module feeds/discovery
*/
import { discoverFeedsFromUrl } from "./fetcher.js";
import { validateFeedUrl } from "./validator.js";
/**
* Feed type display labels
*/
const FEED_TYPE_LABELS = {
rss: "RSS Feed",
atom: "Atom Feed",
jsonfeed: "JSON Feed",
hfeed: "h-feed (Microformats)",
activitypub: "ActivityPub",
unknown: "Unknown",
};
/**
* Discover and validate all feeds from a URL
* @param {string} url - Page or feed URL
* @returns {Promise<Array>} Array of discovered feeds with validation status
*/
export async function discoverAndValidateFeeds(url) {
// First discover feeds from the URL
const feeds = await discoverFeedsFromUrl(url);
// If no feeds found, return empty with error info
if (feeds.length === 0) {
return [
{
url,
type: "unknown",
typeLabel: "No feed found",
valid: false,
error: "No feeds were discovered at this URL",
isCommentsFeed: false,
},
];
}
// Validate each discovered feed in parallel
const validatedFeeds = await Promise.all(
feeds.map(async (feed) => {
const validation = await validateFeedUrl(feed.url);
return {
url: feed.url,
type: validation.feedType || feed.type,
typeLabel:
FEED_TYPE_LABELS[validation.feedType] ||
FEED_TYPE_LABELS[feed.type] ||
"Feed",
valid: validation.valid,
error: validation.error,
isCommentsFeed: validation.isCommentsFeed || false,
title: validation.title || feed.title,
rel: feed.rel,
};
}),
);
// Sort: valid feeds first, non-comments before comments, then alphabetically
return validatedFeeds.sort((a, b) => {
// Valid feeds first
if (a.valid !== b.valid) return a.valid ? -1 : 1;
// Non-comments before comments
if (a.isCommentsFeed !== b.isCommentsFeed) return a.isCommentsFeed ? 1 : -1;
// Then by URL
return a.url.localeCompare(b.url);
});
}
/**
* Filter to only main content feeds (exclude comments)
* @param {Array} feeds - Array of feed objects
* @returns {Array} Filtered array of main content feeds
*/
export function filterMainFeeds(feeds) {
return feeds.filter((feed) => feed.valid && !feed.isCommentsFeed);
}
/**
* Get the best feed from a list (first valid, non-comments feed)
* @param {Array} feeds - Array of feed objects
* @returns {object|undefined} Best feed or undefined
*/
export function getBestFeed(feeds) {
const mainFeeds = filterMainFeeds(feeds);
return mainFeeds.length > 0 ? mainFeeds[0] : undefined;
}
export { FEED_TYPE_LABELS };

128
lib/feeds/validator.js Normal file
View File

@@ -0,0 +1,128 @@
/**
* Feed validation utilities
* @module feeds/validator
*/
import { fetchFeed } from "./fetcher.js";
import { detectFeedType } from "./parser.js";
/**
* Feed types that are valid subscriptions
*/
const VALID_FEED_TYPES = ["rss", "atom", "jsonfeed", "hfeed"];
/**
* Patterns that indicate a comments feed (not a main feed)
*/
const COMMENTS_PATTERNS = [
/\/comments\/?$/i,
/\/feed\/comments/i,
/commentsfeed/i,
/comment-feed/i,
/-comments\.xml$/i,
/\/replies\/?$/i,
/comments\.rss$/i,
/comments\.atom$/i,
];
/**
* Validate a URL is actually a feed
* @param {string} url - URL to validate
* @returns {Promise<object>} Validation result
*/
export async function validateFeedUrl(url) {
try {
const result = await fetchFeed(url, { timeout: 15000 });
if (result.notModified || !result.content) {
return {
valid: false,
error: "Unable to fetch content from URL",
};
}
const feedType = detectFeedType(result.content, result.contentType);
if (feedType === "activitypub") {
return {
valid: false,
error:
"URL returns ActivityPub JSON instead of a feed. Try the direct feed URL.",
feedType,
};
}
if (!VALID_FEED_TYPES.includes(feedType)) {
return {
valid: false,
error: `URL does not contain a valid feed (detected: ${feedType})`,
feedType,
};
}
// Check if it's a comments feed
const isCommentsFeed = COMMENTS_PATTERNS.some((pattern) =>
pattern.test(url),
);
return {
valid: true,
feedType,
isCommentsFeed,
title: extractFeedTitle(result.content, feedType),
contentType: result.contentType,
};
} catch (error) {
return {
valid: false,
error: error.message,
};
}
}
/**
* Extract feed title from content
* @param {string} content - Feed content
* @param {string} feedType - Type of feed
* @returns {string|undefined} Feed title
*/
function extractFeedTitle(content, feedType) {
if (feedType === "jsonfeed") {
try {
const json = JSON.parse(content);
return json.title;
} catch {
return undefined;
}
}
// Extract title from XML (RSS or Atom)
// Try channel/title first (RSS), then just title (Atom)
const channelTitleMatch = content.match(
/<channel[^>]*>[\s\S]*?<title[^>]*>([^<]+)<\/title>/i,
);
if (channelTitleMatch) {
return decodeXmlEntities(channelTitleMatch[1].trim());
}
const titleMatch = content.match(/<title[^>]*>([^<]+)<\/title>/i);
return titleMatch ? decodeXmlEntities(titleMatch[1].trim()) : undefined;
}
/**
* Decode XML entities
* @param {string} str - String with XML entities
* @returns {string} Decoded string
*/
function decodeXmlEntities(str) {
return str
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, code) =>
String.fromCharCode(parseInt(code, 16)),
);
}

View File

@@ -297,3 +297,68 @@ export async function updateFeedWebsub(application, id, websub) {
export async function getFeedBySubscriptionId(application, subscriptionId) {
return getFeedById(application, subscriptionId);
}
/**
* Update feed status after processing
* Tracks health status, errors, and success metrics
* @param {object} application - Indiekit application
* @param {ObjectId|string} id - Feed ObjectId
* @param {object} status - Status update
* @param {boolean} status.success - Whether fetch was successful
* @param {string} [status.error] - Error message if failed
* @param {number} [status.itemCount] - Number of items in feed
* @returns {Promise<object|null>} Updated feed
*/
export async function updateFeedStatus(application, id, status) {
const collection = getCollection(application);
const objectId = typeof id === "string" ? new ObjectId(id) : id;
const updateFields = {
updatedAt: new Date(),
};
if (status.success) {
updateFields.status = "active";
updateFields.lastSuccessAt = new Date();
updateFields.consecutiveErrors = 0;
updateFields.lastError = undefined;
updateFields.lastErrorAt = undefined;
if (status.itemCount !== undefined) {
updateFields.itemCount = status.itemCount;
}
} else {
updateFields.status = "error";
updateFields.lastError = status.error;
updateFields.lastErrorAt = new Date();
}
// Use $set for most fields, $inc for consecutiveErrors on failure
const updateOp = { $set: updateFields };
if (!status.success) {
// Increment consecutive errors
updateOp.$inc = { consecutiveErrors: 1 };
}
return collection.findOneAndUpdate({ _id: objectId }, updateOp, {
returnDocument: "after",
});
}
/**
* Get feeds with errors
* @param {object} application - Indiekit application
* @param {number} [minErrors=3] - Minimum consecutive errors
* @returns {Promise<Array>} Array of feeds with errors
*/
export async function getFeedsWithErrors(application, minErrors = 3) {
const collection = getCollection(application);
return collection
.find({
status: "error",
consecutiveErrors: { $gte: minErrors },
})
.toArray();
}

View File

@@ -1,6 +1,6 @@
{
"name": "@rmdes/indiekit-endpoint-microsub",
"version": "1.0.22",
"version": "1.0.23",
"description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.",
"keywords": [
"indiekit",

View File

@@ -25,16 +25,40 @@
</div>
</form>
{% if validationError %}
<div class="notice notice--error">
<p>{{ validationError }}</p>
</div>
{% endif %}
{% if discoveryError %}
<div class="notice notice--error">
<p>{{ discoveryError }}</p>
</div>
{% endif %}
{% if results and results.length > 0 %}
<div class="search__results">
<h3>{{ __("microsub.search.title") }}</h3>
<div class="search__list">
{% for result in results %}
<div class="search__item">
<div class="search__item{% if not result.valid %} search__item--invalid{% endif %}{% if result.isCommentsFeed %} search__item--comments{% endif %}">
<div class="search__feed">
<span class="search__name">{{ result.title or "Feed" }}</span>
<span class="search__name">
{{ result.title or "Feed" }}
<span class="search__type badge badge--{% if result.valid %}info{% else %}warning{% endif %}">
{{ result.typeLabel }}
</span>
{% if result.isCommentsFeed %}
<span class="search__type badge badge--warning">Comments</span>
{% endif %}
</span>
<span class="search__url">{{ result.url | replace("https://", "") | replace("http://", "") }}</span>
{% if not result.valid %}
<span class="search__error">{{ result.error }}</span>
{% endif %}
</div>
{% if result.valid %}
<form method="post" action="{{ baseUrl }}/subscribe" class="search__subscribe">
<input type="hidden" name="url" value="{{ result.url }}">
<label for="channel-{{ loop.index }}" class="visually-hidden">{{ __("microsub.channels.title") }}</label>
@@ -48,6 +72,9 @@
classes: "button--small"
}) }}
</form>
{% else %}
<span class="search__invalid-badge">Invalid</span>
{% endif %}
</div>
{% endfor %}
</div>