feat: delete stale items older than 30 days

Stripped dedup skeletons and unread items older than 30 days are now
hard-deleted on startup. Previously, stripped skeletons accumulated
forever and unread items had no expiry, causing unbounded collection
growth.

Confab-Link: http://localhost:8080/sessions/4d40ef89-a713-48c1-b4ed-0ffafca25677
This commit is contained in:
Ricardo
2026-03-05 22:52:59 +01:00
parent 4e97bad7d9
commit a51b554068
3 changed files with 72 additions and 2 deletions

View File

@@ -9,7 +9,11 @@ import { readerController } from "./lib/controllers/reader.js";
import { handleMediaProxy } from "./lib/media/proxy.js";
import { startScheduler, stopScheduler } from "./lib/polling/scheduler.js";
import { ensureActivityPubChannel } from "./lib/storage/channels.js";
import { cleanupAllReadItems, createIndexes } from "./lib/storage/items.js";
import {
cleanupAllReadItems,
cleanupStaleItems,
createIndexes,
} from "./lib/storage/items.js";
import { webmentionReceiver } from "./lib/webmention/receiver.js";
import { websubHandler } from "./lib/websub/handler.js";
@@ -210,6 +214,11 @@ export default class MicrosubEndpoint {
cleanupAllReadItems(indiekit).catch((error) => {
console.warn("[Microsub] Startup cleanup failed:", error.message);
});
// Delete stale items (stripped skeletons + unread older than 30 days)
cleanupStaleItems(indiekit).catch((error) => {
console.warn("[Microsub] Stale cleanup failed:", error.message);
});
} else {
console.warn(
"[Microsub] Database not available at init, scheduler not started",

View File

@@ -387,6 +387,10 @@ export async function countReadItems(application, channelId, userId) {
// uid, readBy) so the poller doesn't re-ingest them as new unread entries.
const MAX_FULL_READ_ITEMS = 200;
// Maximum age (in days) for stripped skeletons and unread items.
// After this period, both are hard-deleted to prevent unbounded growth.
const MAX_ITEM_AGE_DAYS = 30;
/**
* Cleanup old read items by stripping content but preserving dedup skeletons.
* This prevents the vicious cycle where deleted read items get re-ingested as
@@ -575,6 +579,63 @@ export async function cleanupAllReadItems(application) {
return totalCleaned;
}
/**
* Delete stale items: stripped skeletons and unread items older than MAX_ITEM_AGE_DAYS.
* Stripped skeletons have served their dedup purpose; stale unread items are unlikely
* to be read. Both are hard-deleted to prevent unbounded collection growth.
* @param {object} application - Indiekit application
* @returns {Promise<number>} Total number of items deleted
*/
export async function cleanupStaleItems(application) {
const collection = getCollection(application);
const cutoff = new Date();
cutoff.setDate(cutoff.getDate() - MAX_ITEM_AGE_DAYS);
// Delete stripped skeletons older than cutoff
const strippedResult = await collection.deleteMany({
_stripped: true,
$or: [
{ published: { $lt: cutoff } },
{ published: { $exists: false }, createdAt: { $lt: cutoff.toISOString() } },
],
});
// Delete unread items older than cutoff
const unreadResult = await collection.deleteMany({
readBy: { $in: [null, []] },
_stripped: { $ne: true },
$or: [
{ published: { $lt: cutoff } },
{ published: { $exists: false }, createdAt: { $lt: cutoff.toISOString() } },
],
});
// Also catch items with no readBy field at all
const noReadByResult = await collection.deleteMany({
readBy: { $exists: false },
_stripped: { $ne: true },
$or: [
{ published: { $lt: cutoff } },
{ published: { $exists: false }, createdAt: { $lt: cutoff.toISOString() } },
],
});
const total =
strippedResult.deletedCount +
unreadResult.deletedCount +
noReadByResult.deletedCount;
if (total > 0) {
console.info(
`[Microsub] Stale cleanup: deleted ${strippedResult.deletedCount} stripped skeletons, ` +
`${unreadResult.deletedCount + noReadByResult.deletedCount} stale unread items ` +
`(cutoff: ${MAX_ITEM_AGE_DAYS} days)`,
);
}
return total;
}
export async function markItemsRead(application, channelId, entryIds, userId) {
const collection = getCollection(application);
const channelObjectId =

View File

@@ -1,6 +1,6 @@
{
"name": "@rmdes/indiekit-endpoint-microsub",
"version": "1.0.42",
"version": "1.0.43",
"description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.",
"keywords": [
"indiekit",