From 6bddf61c04fef0b134ad88bfd9a21a1b51160a6f Mon Sep 17 00:00:00 2001
From: svemagie <869694+svemagie@users.noreply.github.com>
Date: Thu, 19 Mar 2026 14:59:41 +0100
Subject: [PATCH] chore: block AI scrapers in robots.txt

Block known AI training bots (GPTBot, ClaudeBot, CCBot, etc.)
from crawling the site.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 robots.txt | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/robots.txt b/robots.txt
index 668f96b..3b3624b 100644
--- a/robots.txt
+++ b/robots.txt
@@ -1,5 +1,22 @@
-User-agent: *
-Disallow:
+# There is no search benefit to any AI models scraping sites - all they do is steal content for
+# their own profit, attribution free, which leads to them serving our content without ever sending
+# users to us.
+# Reference: https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/
+# See: https://github.com/MattWilcox/native-base/blob/45f6e7a837104f5ad83a5c7e280fb9a4eb126219/robots.txt
 
-# Add additional rules as needed
-# Example: Disallow: /private/
+User-agent: CCBot
+User-agent: ChatGPT-User
+User-agent: GPTBot
+User-agent: Google-Extended
+User-agent: Omgilibot
+User-agent: Omgili
+User-agent: FacebookBot
+User-agent: Applebot-Extended
+User-agent: anthropic-ai
+User-agent: ClaudeBot
+User-agent: Diffbot
+User-agent: Bytespider
+User-agent: ImagesiftBot
+User-agent: PerplexityBot
+User-agent: cohere-ai
+Disallow: /
\ No newline at end of file