From a51b5540682f16e2e895be408f4d6d313716c0e7 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 5 Mar 2026 22:52:59 +0100 Subject: [PATCH] feat: delete stale items older than 30 days Stripped dedup skeletons and unread items older than 30 days are now hard-deleted on startup. Previously, stripped skeletons accumulated forever and unread items had no expiry, causing unbounded collection growth. Confab-Link: http://localhost:8080/sessions/4d40ef89-a713-48c1-b4ed-0ffafca25677 --- index.js | 11 +++++++- lib/storage/items.js | 61 ++++++++++++++++++++++++++++++++++++++++++++ package.json | 2 +- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 72d8882..127df93 100644 --- a/index.js +++ b/index.js @@ -9,7 +9,11 @@ import { readerController } from "./lib/controllers/reader.js"; import { handleMediaProxy } from "./lib/media/proxy.js"; import { startScheduler, stopScheduler } from "./lib/polling/scheduler.js"; import { ensureActivityPubChannel } from "./lib/storage/channels.js"; -import { cleanupAllReadItems, createIndexes } from "./lib/storage/items.js"; +import { + cleanupAllReadItems, + cleanupStaleItems, + createIndexes, +} from "./lib/storage/items.js"; import { webmentionReceiver } from "./lib/webmention/receiver.js"; import { websubHandler } from "./lib/websub/handler.js"; @@ -210,6 +214,11 @@ export default class MicrosubEndpoint { cleanupAllReadItems(indiekit).catch((error) => { console.warn("[Microsub] Startup cleanup failed:", error.message); }); + + // Delete stale items (stripped skeletons + unread older than 30 days) + cleanupStaleItems(indiekit).catch((error) => { + console.warn("[Microsub] Stale cleanup failed:", error.message); + }); } else { console.warn( "[Microsub] Database not available at init, scheduler not started", diff --git a/lib/storage/items.js b/lib/storage/items.js index 31369e1..27cbb8a 100644 --- a/lib/storage/items.js +++ b/lib/storage/items.js @@ -387,6 +387,10 @@ export async function countReadItems(application, channelId, userId) { // uid, readBy) so the poller doesn't re-ingest them as new unread entries. const MAX_FULL_READ_ITEMS = 200; +// Maximum age (in days) for stripped skeletons and unread items. +// After this period, both are hard-deleted to prevent unbounded growth. +const MAX_ITEM_AGE_DAYS = 30; + /** * Cleanup old read items by stripping content but preserving dedup skeletons. * This prevents the vicious cycle where deleted read items get re-ingested as @@ -575,6 +579,63 @@ export async function cleanupAllReadItems(application) { return totalCleaned; } +/** + * Delete stale items: stripped skeletons and unread items older than MAX_ITEM_AGE_DAYS. + * Stripped skeletons have served their dedup purpose; stale unread items are unlikely + * to be read. Both are hard-deleted to prevent unbounded collection growth. + * @param {object} application - Indiekit application + * @returns {Promise} Total number of items deleted + */ +export async function cleanupStaleItems(application) { + const collection = getCollection(application); + const cutoff = new Date(); + cutoff.setDate(cutoff.getDate() - MAX_ITEM_AGE_DAYS); + + // Delete stripped skeletons older than cutoff + const strippedResult = await collection.deleteMany({ + _stripped: true, + $or: [ + { published: { $lt: cutoff } }, + { published: { $exists: false }, createdAt: { $lt: cutoff.toISOString() } }, + ], + }); + + // Delete unread items older than cutoff + const unreadResult = await collection.deleteMany({ + readBy: { $in: [null, []] }, + _stripped: { $ne: true }, + $or: [ + { published: { $lt: cutoff } }, + { published: { $exists: false }, createdAt: { $lt: cutoff.toISOString() } }, + ], + }); + + // Also catch items with no readBy field at all + const noReadByResult = await collection.deleteMany({ + readBy: { $exists: false }, + _stripped: { $ne: true }, + $or: [ + { published: { $lt: cutoff } }, + { published: { $exists: false }, createdAt: { $lt: cutoff.toISOString() } }, + ], + }); + + const total = + strippedResult.deletedCount + + unreadResult.deletedCount + + noReadByResult.deletedCount; + + if (total > 0) { + console.info( + `[Microsub] Stale cleanup: deleted ${strippedResult.deletedCount} stripped skeletons, ` + + `${unreadResult.deletedCount + noReadByResult.deletedCount} stale unread items ` + + `(cutoff: ${MAX_ITEM_AGE_DAYS} days)`, + ); + } + + return total; +} + export async function markItemsRead(application, channelId, entryIds, userId) { const collection = getCollection(application); const channelObjectId = diff --git a/package.json b/package.json index 9c03ef4..6b56fc0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@rmdes/indiekit-endpoint-microsub", - "version": "1.0.42", + "version": "1.0.43", "description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.", "keywords": [ "indiekit",