Files
indiekit-blog/lib/unfurl-shortcode.js
svemagie f6f7cac403 fix: harden unfurl timeout + persist CI fetch cache
- Add hard 22s Promise.race deadline in prefetchUrl() to guard against
  TCP-level connection hangs that bypass unfurl.js's read-only timeout.
  Fixes builds hanging indefinitely on unresponsive hosts.

- Add actions/cache step to deploy.yml persisting .cache/ between runs.
  Prevents webmention (and all eleventy-fetch) data loss on transient
  502s: a populated cache means failures return existing data, not [].

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-19 22:28:40 +01:00

180 lines
5.9 KiB
JavaScript

import { unfurl } from "unfurl.js";
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
import { resolve } from "path";
import { createHash } from "crypto";
const CACHE_DIR = resolve(import.meta.dirname, "..", ".cache", "unfurl");
const CACHE_DURATION_MS = 7 * 24 * 60 * 60 * 1000; // 1 week
const FAILURE_CACHE_MS = 24 * 60 * 60 * 1000; // 1 day for failed fetches
const USER_AGENT = "Mozilla/5.0 (compatible; Indiekit/1.0; +https://getindiekit.com)";
// Concurrency limiter — prevents overwhelming outbound network
let activeRequests = 0;
const MAX_CONCURRENT = 5;
const queue = [];
function runNext() {
if (queue.length === 0 || activeRequests >= MAX_CONCURRENT) return;
activeRequests++;
const { resolve: res, fn } = queue.shift();
fn().then(res).finally(() => {
activeRequests--;
runNext();
});
}
function throttled(fn) {
return new Promise((res) => {
queue.push({ resolve: res, fn });
runNext();
});
}
function getCachePath(url) {
const hash = createHash("md5").update(url).digest("hex");
return resolve(CACHE_DIR, `${hash}.json`);
}
export function readCache(url) {
const path = getCachePath(url);
if (!existsSync(path)) return undefined; // undefined = not cached
try {
const data = JSON.parse(readFileSync(path, "utf-8"));
const age = Date.now() - data.cachedAt;
const ttl = data.failed ? FAILURE_CACHE_MS : CACHE_DURATION_MS;
if (age < ttl) {
return data; // return full cache entry (includes .failed flag)
}
} catch {
// Corrupt cache file, ignore
}
return undefined;
}
function writeCache(url, metadata, failed = false) {
mkdirSync(CACHE_DIR, { recursive: true });
const path = getCachePath(url);
writeFileSync(path, JSON.stringify({ cachedAt: Date.now(), metadata, failed }));
}
export function extractDomain(url) {
try {
return new URL(url).hostname.replace(/^www\./, "");
} catch {
return url;
}
}
export function escapeHtml(str) {
if (!str) return "";
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
}
export function renderFallbackLink(url) {
const domain = escapeHtml(extractDomain(url));
return `<a href="${escapeHtml(url)}" rel="noopener" target="_blank">${domain}</a>`;
}
export function renderCard(url, metadata) {
const og = metadata.open_graph || {};
const tc = metadata.twitter_card || {};
const title = og.title || tc.title || metadata.title || extractDomain(url);
const description = og.description || tc.description || metadata.description || "";
const image = og.images?.[0]?.url || tc.images?.[0]?.url || null;
const favicon = metadata.favicon || null;
const domain = extractDomain(url);
const maxDesc = 160;
const desc = description.length > maxDesc
? description.slice(0, maxDesc).trim() + "\u2026"
: description;
const imgHtml = image
? `<div class="unfurl-card-image shrink-0">
<img src="${escapeHtml(image)}" alt="" loading="lazy" decoding="async"
class="w-24 h-24 sm:w-32 sm:h-32 object-cover rounded-r-lg" />
</div>`
: "";
const faviconHtml = favicon
? `<img src="${escapeHtml(favicon)}" alt="" class="inline-block w-4 h-4 mr-1 align-text-bottom" loading="lazy" />`
: "";
return `<div class="unfurl-card not-prose my-4 rounded-lg border border-surface-200 dark:border-surface-700 bg-surface-50 dark:bg-surface-800 overflow-hidden hover:border-accent-300 dark:hover:border-accent-600 transition-colors">
<a href="${escapeHtml(url)}" rel="noopener" target="_blank" class="flex no-underline text-inherit hover:text-inherit">
<div class="flex-1 p-3 sm:p-4 min-w-0">
<p class="font-semibold text-sm sm:text-base text-surface-900 dark:text-surface-100 truncate m-0">${escapeHtml(title)}</p>
${desc ? `<p class="text-xs sm:text-sm text-surface-600 dark:text-surface-400 mt-1 m-0 line-clamp-2">${escapeHtml(desc)}</p>` : ""}
<p class="text-xs text-surface-600 dark:text-surface-400 mt-2 m-0">${faviconHtml}${escapeHtml(domain)}</p>
</div>
${imgHtml}
</a>
</div>`;
}
/**
* Fetch unfurl metadata for a URL and populate the disk cache.
* Returns the rendered HTML card (or fallback link on failure).
*/
export async function prefetchUrl(url) {
if (!url) return "";
// Already cached — skip network fetch
const cached = readCache(url);
if (cached !== undefined) {
return cached.failed ? renderFallbackLink(url) : renderCard(url, cached.metadata);
}
const metadata = await throttled(async () => {
try {
// Hard outer deadline guards against TCP-level hangs that bypass unfurl's
// own timeout (which only covers HTTP read, not connection establishment).
const deadline = new Promise((_, reject) =>
setTimeout(() => reject(new Error("hard deadline 22s")), 22000)
);
return await Promise.race([
unfurl(url, { timeout: 18000, headers: { "User-Agent": USER_AGENT } }),
deadline,
]);
} catch (err) {
console.warn(`[unfurl] Failed to fetch ${url}: ${err.message}`);
return null;
}
});
if (!metadata) {
writeCache(url, null, true);
return renderFallbackLink(url);
}
writeCache(url, metadata, false);
return renderCard(url, metadata);
}
/**
* Synchronous cache-only lookup. Returns the rendered card HTML if cached,
* a fallback link if cached as failed, or empty string if not cached.
* Safe to use in deeply nested Nunjucks includes where async isn't supported.
*/
export function getCachedCard(url) {
if (!url) return "";
const cached = readCache(url);
if (cached === undefined) return renderFallbackLink(url);
if (cached.failed) return renderFallbackLink(url);
return renderCard(url, cached.metadata);
}
/**
* Register the {% unfurl "URL" %} shortcode on an Eleventy config.
*/
export default function registerUnfurlShortcode(eleventyConfig) {
eleventyConfig.addAsyncShortcode("unfurl", async function (url) {
return prefetchUrl(url);
});
}