fix(og): strip markdown tables, lists, and non-renderable chars from body text

extractBodyText() was too naive - markdown tables (|...|), heading anchors
({#id}), list numbering, and HTML tags leaked into OG image titles for
notes without explicit titles. Characters outside Inter font coverage
caused Satori to render "NO GLYPH" vertically on the card.

Confab-Link: http://localhost:8080/sessions/5565387e-4eb5-4441-89fb-2c6347de8e0c
This commit is contained in:
Ricardo
2026-03-10 19:51:50 +01:00
parent 0fe99ee5b1
commit b3b65bf891

View File

@@ -121,14 +121,35 @@ function truncateTitle(title, max = 120) {
function extractBodyText(raw) {
const body = raw
// Strip frontmatter
.replace(/^---[\s\S]*?---\s*/, "")
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
.replace(/[#*_~`>]/g, "")
// Strip images ![alt](url)
.replace(/!\[[^\]]*\]\([^)]+\)/g, "")
.replace(/\n+/g, " ")
// Strip markdown tables (lines with pipes)
.replace(/^\|.*\|$/gm, "")
// Strip table separator rows (|---|---|)
.replace(/^\s*[-|: ]+$/gm, "")
// Strip heading anchors {#id}
.replace(/\{#[^}]+\}/g, "")
// Strip HTML tags
.replace(/<[^>]+>/g, "")
// Strip markdown links, keep text
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
// Strip heading markers, bold, italic, strikethrough, code, blockquote
.replace(/[#*_~`>]/g, "")
// Strip list bullets (-, *, +) and numbered lists (1.)
.replace(/^\s*[-*+]\s+/gm, "")
.replace(/^\s*\d+\.\s+/gm, "")
// Strip horizontal rules
.replace(/^-{3,}$/gm, "")
// Collapse all whitespace (newlines, tabs, multiple spaces)
.replace(/\s+/g, " ")
.trim();
if (!body) return "Untitled";
return body.length > 120 ? body.slice(0, 120).trim() + "\u2026" : body;
// Strip any non-ASCII-printable characters that could cause NO GLYPH in Satori
const safe = body.replace(/[^\x20-\x7E\u00A0-\u024F\u2010-\u2027\u2030-\u205E]/g, "").trim();
const text = safe || body;
return text.length > 120 ? text.slice(0, 120).trim() + "\u2026" : text;
}
function buildCard(title, dateStr, postType, siteName) {