From 6bddf61c04fef0b134ad88bfd9a21a1b51160a6f Mon Sep 17 00:00:00 2001 From: svemagie <869694+svemagie@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:59:41 +0100 Subject: [PATCH] chore: block AI scrapers in robots.txt Block known AI training bots (GPTBot, ClaudeBot, CCBot, etc.) from crawling the site. Co-Authored-By: Claude Sonnet 4.6 --- robots.txt | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/robots.txt b/robots.txt index 668f96b..3b3624b 100644 --- a/robots.txt +++ b/robots.txt @@ -1,5 +1,22 @@ -User-agent: * -Disallow: +# There is no search benefit to any AI models scraping sites - all they do is steal content for +# their own profit, attribution free, which leads to them serving our content without ever sending +# users to us. +# Reference: https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ +# See: https://github.com/MattWilcox/native-base/blob/45f6e7a837104f5ad83a5c7e280fb9a4eb126219/robots.txt -# Add additional rules as needed -# Example: Disallow: /private/ +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: GPTBot +User-agent: Google-Extended +User-agent: Omgilibot +User-agent: Omgili +User-agent: FacebookBot +User-agent: Applebot-Extended +User-agent: anthropic-ai +User-agent: ClaudeBot +User-agent: Diffbot +User-agent: Bytespider +User-agent: ImagesiftBot +User-agent: PerplexityBot +User-agent: cohere-ai +Disallow: / \ No newline at end of file