diff --git a/index.js b/index.js index b82a66d..bb9d367 100644 --- a/index.js +++ b/index.js @@ -86,6 +86,9 @@ export default class BlogrollEndpoint { protectedRouter.post("/blogs/:id/delete", blogsController.remove); protectedRouter.post("/blogs/:id/refresh", blogsController.refresh); + // Feed discovery (protected to prevent abuse) + protectedRouter.get("/api/discover", apiController.discover); + return protectedRouter; } diff --git a/lib/controllers/api.js b/lib/controllers/api.js index 4465ec1..165148e 100644 --- a/lib/controllers/api.js +++ b/lib/controllers/api.js @@ -8,6 +8,7 @@ import { getBlogs, countBlogs, getBlog, getCategories } from "../storage/blogs.j import { getItems, getItemsForBlog } from "../storage/items.js"; import { getSyncStatus } from "../sync/scheduler.js"; import { generateOpml } from "../sync/opml.js"; +import { discoverFeeds } from "../utils/feed-discovery.js"; /** * List blogs with optional filtering @@ -185,6 +186,26 @@ async function exportOpmlCategory(request, response) { } } +/** + * Discover feeds from a website URL + * GET /api/discover?url=... + */ +async function discover(request, response) { + const { url } = request.query; + + if (!url) { + return response.status(400).json({ error: "URL parameter required" }); + } + + try { + const result = await discoverFeeds(url); + response.json(result); + } catch (error) { + console.error("[Blogroll API] discover error:", error); + response.status(500).json({ error: "Failed to discover feeds" }); + } +} + // Helper functions /** @@ -237,4 +258,5 @@ export const apiController = { status, exportOpml, exportOpmlCategory, + discover, }; diff --git a/lib/utils/feed-discovery.js b/lib/utils/feed-discovery.js new file mode 100644 index 0000000..7d2cd52 --- /dev/null +++ b/lib/utils/feed-discovery.js @@ -0,0 +1,164 @@ +/** + * RSS/Atom feed discovery from website URLs + * @module utils/feed-discovery + */ + +/** + * Discover RSS/Atom feeds from a website URL + * @param {string} websiteUrl - The website URL to check + * @param {number} timeout - Fetch timeout in ms + * @returns {Promise} Discovery result with feeds array + */ +export async function discoverFeeds(websiteUrl, timeout = 10000) { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeout); + + try { + // Normalize URL + let url = websiteUrl.trim(); + if (!url.startsWith("http://") && !url.startsWith("https://")) { + url = "https://" + url; + } + + const response = await fetch(url, { + signal: controller.signal, + headers: { + "User-Agent": "Indiekit-Blogroll/1.0 (Feed Discovery)", + Accept: "text/html,application/xhtml+xml", + }, + }); + + if (!response.ok) { + return { success: false, error: `HTTP ${response.status}`, feeds: [] }; + } + + const html = await response.text(); + const feeds = []; + const baseUrl = new URL(url); + + // Find feeds in HTML + const linkRegex = + /]+rel=["']alternate["'][^>]*>/gi; + const typeRegex = /type=["']([^"']+)["']/i; + const hrefRegex = /href=["']([^"']+)["']/i; + const titleRegex = /title=["']([^"']+)["']/i; + + const feedTypes = [ + "application/rss+xml", + "application/atom+xml", + "application/feed+json", + "application/json", + "text/xml", + ]; + + let match; + while ((match = linkRegex.exec(html)) !== null) { + const linkTag = match[0]; + const typeMatch = typeRegex.exec(linkTag); + const hrefMatch = hrefRegex.exec(linkTag); + + if (hrefMatch) { + const type = typeMatch ? typeMatch[1].toLowerCase() : ""; + const href = hrefMatch[1]; + const titleMatch = titleRegex.exec(linkTag); + const title = titleMatch ? titleMatch[1] : null; + + // Check if it's a feed type + if (feedTypes.some((ft) => type.includes(ft.split("/")[1]))) { + // Resolve relative URLs + const feedUrl = new URL(href, baseUrl).href; + + feeds.push({ + url: feedUrl, + type: type.includes("atom") + ? "atom" + : type.includes("json") + ? "json" + : "rss", + title, + }); + } + } + } + + // Also check common feed paths if no feeds found in HTML + if (feeds.length === 0) { + const commonPaths = [ + "/feed", + "/feed.xml", + "/rss", + "/rss.xml", + "/atom.xml", + "/feed/atom", + "/feed/rss", + "/index.xml", + "/blog/feed", + "/blog/rss", + "/.rss", + "/feed.json", + ]; + + for (const path of commonPaths) { + try { + const feedUrl = new URL(path, baseUrl).href; + const feedResponse = await fetch(feedUrl, { + method: "HEAD", + signal: controller.signal, + headers: { + "User-Agent": "Indiekit-Blogroll/1.0 (Feed Discovery)", + }, + }); + + if (feedResponse.ok) { + const contentType = feedResponse.headers.get("content-type") || ""; + if ( + contentType.includes("xml") || + contentType.includes("rss") || + contentType.includes("atom") || + contentType.includes("json") + ) { + feeds.push({ + url: feedUrl, + type: contentType.includes("atom") + ? "atom" + : contentType.includes("json") + ? "json" + : "rss", + title: null, + }); + break; // Found one, stop checking + } + } + } catch { + // Ignore individual path errors + } + } + } + + // Try to extract page title for blog name + let pageTitle = null; + const titleTagMatch = /]*>([^<]+)<\/title>/i.exec(html); + if (titleTagMatch) { + pageTitle = titleTagMatch[1].trim(); + // Clean up common suffixes + pageTitle = pageTitle + .replace(/\s*[-|–—]\s*.*$/, "") + .replace(/\s*:\s*Home.*$/i, "") + .trim(); + } + + return { + success: true, + feeds, + pageTitle, + siteUrl: baseUrl.origin, + }; + } catch (error) { + if (error.name === "AbortError") { + return { success: false, error: "Request timed out", feeds: [] }; + } + return { success: false, error: error.message, feeds: [] }; + } finally { + clearTimeout(timeoutId); + } +} diff --git a/locales/en.json b/locales/en.json index 75656e5..f826078 100644 --- a/locales/en.json +++ b/locales/en.json @@ -91,6 +91,17 @@ "deleted": "Blog deleted successfully.", "refreshed": "Blog refreshed. Added %{items} new items.", "form": { + "discoverUrl": "Website URL", + "discover": "Discover Feed", + "discoverHint": "Enter a website URL to auto-discover its RSS/Atom feed", + "discoverNoUrl": "Please enter a website URL", + "discovering": "Discovering...", + "discoveringHint": "Checking for RSS/Atom feeds...", + "discoverFailed": "Failed to discover feeds", + "discoverNoFeeds": "No feeds found on this website", + "discoverFoundOne": "Found feed:", + "discoverFoundMultiple": "Multiple feeds found. Click one to select:", + "discoverSelected": "Selected feed:", "feedUrl": "Feed URL", "feedUrlHint": "RSS, Atom, or JSON Feed URL", "title": "Title", diff --git a/package.json b/package.json index f0253b0..f8280f4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@rmdes/indiekit-endpoint-blogroll", - "version": "1.0.3", + "version": "1.0.4", "description": "Blogroll endpoint for Indiekit. Aggregates blog feeds from OPML, JSON feeds, or manual entry.", "keywords": [ "indiekit", diff --git a/views/blogroll-blog-edit.njk b/views/blogroll-blog-edit.njk index b30a555..0777804 100644 --- a/views/blogroll-blog-edit.njk +++ b/views/blogroll-blog-edit.njk @@ -107,6 +107,93 @@ text-align: center; padding: var(--space-m, 1rem); } + + .br-discover-section { + background: var(--color-offset, #f5f5f5); + border-radius: var(--border-radius-small, 0.5rem); + padding: var(--space-m, 1rem); + margin-block-end: var(--space-m, 1rem); + } + + .br-discover-section .br-field { + margin-block-end: var(--space-s, 0.75rem); + } + + .br-discover-input { + display: flex; + gap: var(--space-s, 0.75rem); + } + + .br-discover-input input { + flex: 1; + appearance: none; + background-color: var(--color-background, #fff); + border: 1px solid var(--color-outline-variant, #ccc); + border-radius: var(--border-radius-small, 0.25rem); + font: var(--font-body, 0.875rem/1.4 sans-serif); + padding: calc(var(--space-s, 0.75rem) / 2) var(--space-s, 0.75rem); + } + + .br-discover-result { + margin-block-start: var(--space-s, 0.75rem); + padding: var(--space-s, 0.75rem); + background: var(--color-background, #fff); + border-radius: var(--border-radius-small, 0.25rem); + font: var(--font-caption, 0.875rem/1.4 sans-serif); + } + + .br-discover-result.br-discover-result--error { + color: var(--color-error, #dc3545); + } + + .br-discover-result.br-discover-result--success { + color: var(--color-success, #28a745); + } + + .br-discover-feeds { + list-style: none; + padding: 0; + margin: var(--space-xs, 0.5rem) 0 0 0; + display: flex; + flex-direction: column; + gap: var(--space-xs, 0.5rem); + } + + .br-discover-feed { + display: flex; + align-items: center; + gap: var(--space-s, 0.75rem); + padding: var(--space-xs, 0.5rem); + background: var(--color-offset, #f5f5f5); + border-radius: var(--border-radius-small, 0.25rem); + cursor: pointer; + } + + .br-discover-feed:hover { + background: var(--color-primary-offset, #e6f0ff); + } + + .br-discover-feed-url { + flex: 1; + font-family: monospace; + font-size: 0.75rem; + word-break: break-all; + } + + .br-discover-feed-type { + background: var(--color-primary, #0066cc); + color: white; + padding: 0.125rem 0.5rem; + border-radius: 0.25rem; + font-size: 0.625rem; + text-transform: uppercase; + } + + .br-divider { + border: none; + border-block-start: 1px solid var(--color-outline-variant, #ddd); + margin: var(--space-m, 1rem) 0; + }