Files
indiekit-endpoint-microsub/lib/feeds/validator.js
Ricardo ab6f81bf72 feat: Phase 1 - Enhanced feed discovery with validation
- Add validator.js: validateFeedUrl with comments feed detection
- Add discovery.js: discoverAndValidateFeeds with type labels
- Add opml.js: OPML 2.0 export of all subscriptions
- Update reader.js: searchFeeds uses validation, subscribe validates
- Update feeds.js: updateFeedStatus for health tracking
- Update search.njk: Show feed types, validation status, error messages
- Add CSS for badges, notices, and invalid feed styling
- Register OPML export route at /reader/opml

Phase 1 of blogroll implementation plan.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 01:39:58 +01:00

129 lines
3.1 KiB
JavaScript

/**
* Feed validation utilities
* @module feeds/validator
*/
import { fetchFeed } from "./fetcher.js";
import { detectFeedType } from "./parser.js";
/**
* Feed types that are valid subscriptions
*/
const VALID_FEED_TYPES = ["rss", "atom", "jsonfeed", "hfeed"];
/**
* Patterns that indicate a comments feed (not a main feed)
*/
const COMMENTS_PATTERNS = [
/\/comments\/?$/i,
/\/feed\/comments/i,
/commentsfeed/i,
/comment-feed/i,
/-comments\.xml$/i,
/\/replies\/?$/i,
/comments\.rss$/i,
/comments\.atom$/i,
];
/**
* Validate a URL is actually a feed
* @param {string} url - URL to validate
* @returns {Promise<object>} Validation result
*/
export async function validateFeedUrl(url) {
try {
const result = await fetchFeed(url, { timeout: 15000 });
if (result.notModified || !result.content) {
return {
valid: false,
error: "Unable to fetch content from URL",
};
}
const feedType = detectFeedType(result.content, result.contentType);
if (feedType === "activitypub") {
return {
valid: false,
error:
"URL returns ActivityPub JSON instead of a feed. Try the direct feed URL.",
feedType,
};
}
if (!VALID_FEED_TYPES.includes(feedType)) {
return {
valid: false,
error: `URL does not contain a valid feed (detected: ${feedType})`,
feedType,
};
}
// Check if it's a comments feed
const isCommentsFeed = COMMENTS_PATTERNS.some((pattern) =>
pattern.test(url),
);
return {
valid: true,
feedType,
isCommentsFeed,
title: extractFeedTitle(result.content, feedType),
contentType: result.contentType,
};
} catch (error) {
return {
valid: false,
error: error.message,
};
}
}
/**
* Extract feed title from content
* @param {string} content - Feed content
* @param {string} feedType - Type of feed
* @returns {string|undefined} Feed title
*/
function extractFeedTitle(content, feedType) {
if (feedType === "jsonfeed") {
try {
const json = JSON.parse(content);
return json.title;
} catch {
return undefined;
}
}
// Extract title from XML (RSS or Atom)
// Try channel/title first (RSS), then just title (Atom)
const channelTitleMatch = content.match(
/<channel[^>]*>[\s\S]*?<title[^>]*>([^<]+)<\/title>/i,
);
if (channelTitleMatch) {
return decodeXmlEntities(channelTitleMatch[1].trim());
}
const titleMatch = content.match(/<title[^>]*>([^<]+)<\/title>/i);
return titleMatch ? decodeXmlEntities(titleMatch[1].trim()) : undefined;
}
/**
* Decode XML entities
* @param {string} str - String with XML entities
* @returns {string} Decoded string
*/
function decodeXmlEntities(str) {
return str
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, code) =>
String.fromCharCode(parseInt(code, 16)),
);
}