diff --git a/lib/media/proxy.js b/lib/media/proxy.js index 3814fd9..a281c7e 100644 --- a/lib/media/proxy.js +++ b/lib/media/proxy.js @@ -7,6 +7,60 @@ import crypto from "node:crypto"; import { getCache, setCache } from "../cache/redis.js"; +/** + * Private/internal IP ranges that should never be fetched (SSRF protection) + */ +const BLOCKED_HOSTNAMES = new Set(["localhost", "0.0.0.0"]); +const BLOCKED_IP_PREFIXES = [ + "127.", // Loopback + "10.", // Private Class A + "192.168.", // Private Class C + "169.254.", // Link-local + "0.", // Current network +]; + +/** + * Check if a hostname resolves to a private/internal address + * @param {string} urlString - URL to check + * @returns {boolean} True if the URL targets a private/internal address + */ +export function isPrivateUrl(urlString) { + try { + const parsed = new URL(urlString); + const hostname = parsed.hostname; + + // Block known private hostnames + if (BLOCKED_HOSTNAMES.has(hostname)) { + return true; + } + + // Block IPv6 loopback + if (hostname === "::1" || hostname === "[::1]") { + return true; + } + + // Block private IPv4 ranges + for (const prefix of BLOCKED_IP_PREFIXES) { + if (hostname.startsWith(prefix)) { + return true; + } + } + + // Block 172.16.0.0/12 (172.16.x.x - 172.31.x.x) + const match172 = hostname.match(/^172\.(\d+)\./); + if (match172) { + const second = Number.parseInt(match172[1], 10); + if (second >= 16 && second <= 31) { + return true; + } + } + + return false; + } catch { + return true; // Invalid URLs are blocked + } +} + const MAX_SIZE = 2 * 1024 * 1024; // 2MB max image size const CACHE_TTL = 4 * 60 * 60; // 4 hours const ALLOWED_TYPES = new Set([ @@ -99,6 +153,12 @@ export function proxyItemImages(item, baseUrl) { * @returns {Promise} Cached image data or null */ export async function fetchImage(redis, url) { + // Block private/internal URLs (defense-in-depth) + if (isPrivateUrl(url)) { + console.error(`[Microsub] Media proxy blocked private URL: ${url}`); + return; + } + const cacheKey = `media:${hashUrl(url)}`; // Try cache first @@ -194,6 +254,11 @@ export async function handleMediaProxy(request, response) { return response.status(400).send("Invalid URL"); } + // Block requests to private/internal networks (SSRF protection) + if (isPrivateUrl(url)) { + return response.status(403).send("URL not allowed"); + } + // Get Redis client from application const { application } = request.app.locals; const redis = application.redis; @@ -202,8 +267,7 @@ export async function handleMediaProxy(request, response) { const imageData = await fetchImage(redis, url); if (!imageData) { - // Redirect to original URL as fallback - return response.redirect(url); + return response.status(404).send("Image not available"); } // Set cache headers diff --git a/lib/storage/items.js b/lib/storage/items.js index 5f46773..54493b1 100644 --- a/lib/storage/items.js +++ b/lib/storage/items.js @@ -602,7 +602,11 @@ export async function searchItems(application, channelId, query, limit = 20) { typeof channelId === "string" ? new ObjectId(channelId) : channelId; // Use regex search (consider adding text index for better performance) - const regex = new RegExp(query, "i"); + const escapedQuery = query.replaceAll( + /[$()*+.?[\\\]^{|}]/g, + String.raw`\$&`, + ); + const regex = new RegExp(escapedQuery, "i"); const items = await collection .find({ channelId: objectId, diff --git a/lib/webmention/verifier.js b/lib/webmention/verifier.js index 5296d1c..f4fa377 100644 --- a/lib/webmention/verifier.js +++ b/lib/webmention/verifier.js @@ -4,6 +4,29 @@ */ import { mf2 } from "microformats-parser"; +import sanitizeHtml from "sanitize-html"; + +/** + * Sanitize HTML options (matches normalizer.js) + */ +const SANITIZE_OPTIONS = { + allowedTags: [ + "a", "abbr", "b", "blockquote", "br", "code", "em", "figcaption", + "figure", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", + "li", "ol", "p", "pre", "s", "span", "strike", "strong", "sub", + "sup", "table", "tbody", "td", "th", "thead", "tr", "u", "ul", + "video", "audio", "source", + ], + allowedAttributes: { + a: ["href", "title", "rel"], + img: ["src", "alt", "title", "width", "height"], + video: ["src", "poster", "controls", "width", "height"], + audio: ["src", "controls"], + source: ["src", "type"], + "*": ["class"], + }, + allowedSchemes: ["http", "https", "mailto"], +}; /** * Verify a webmention @@ -276,7 +299,7 @@ function extractContent(entry) { return { text: content.value, - html: content.html, + html: content.html ? sanitizeHtml(content.html, SANITIZE_OPTIONS) : undefined, }; }