feat: poison ai crawlers

This commit is contained in:
Leyla Becker 2026-02-11 19:24:18 -06:00
parent 804cafad27
commit 16544f1993
4 changed files with 235 additions and 134 deletions

132
_data/aiBots.json Normal file
View file

@ -0,0 +1,132 @@
[
"AddSearchBot",
"AI2Bot",
"AI2Bot-DeepResearchEval",
"Ai2Bot-Dolma",
"aiHitBot",
"amazon-kendra",
"Amazonbot",
"AmazonBuyForMe",
"Andibot",
"Anomura",
"anthropic-ai",
"Applebot",
"Applebot-Extended",
"atlassian-bot",
"Awario",
"bedrockbot",
"bigsur.ai",
"Bravebot",
"Brightbot 1.0",
"BuddyBot",
"Bytespider",
"CCBot",
"Channel3Bot",
"ChatGLM-Spider",
"ChatGPT Agent",
"ChatGPT-User",
"Claude-SearchBot",
"Claude-User",
"Claude-Web",
"ClaudeBot",
"Cloudflare-AutoRAG",
"CloudVertexBot",
"cohere-ai",
"cohere-training-data-crawler",
"Cotoyogi",
"Crawl4AI",
"Crawlspace",
"Datenbank Crawler",
"DeepSeekBot",
"Devin",
"Diffbot",
"DuckAssistBot",
"Echobot Bot",
"EchoboxBot",
"FacebookBot",
"facebookexternalhit",
"Factset_spyderbot",
"FirecrawlAgent",
"FriendlyCrawler",
"Gemini-Deep-Research",
"Google-CloudVertexBot",
"Google-Extended",
"Google-Firebase",
"Google-NotebookLM",
"GoogleAgent-Mariner",
"GoogleOther",
"GoogleOther-Image",
"GoogleOther-Video",
"GPTBot",
"iAskBot",
"iaskspider",
"iaskspider/2.0",
"IbouBot",
"ICC-Crawler",
"ImagesiftBot",
"imageSpider",
"img2dataset",
"ISSCyberRiskCrawler",
"Kangaroo Bot",
"KlaviyoAIBot",
"KunatoCrawler",
"laion-huggingface-processor",
"LAIONDownloader",
"LCC",
"LinerBot",
"Linguee Bot",
"LinkupBot",
"Manus-User",
"meta-externalagent",
"Meta-ExternalAgent",
"meta-externalfetcher",
"Meta-ExternalFetcher",
"meta-webindexer",
"MistralAI-User",
"MistralAI-User/1.0",
"MyCentralAIScraperBot",
"netEstate Imprint Crawler",
"NotebookLM",
"NovaAct",
"OAI-SearchBot",
"omgili",
"omgilibot",
"OpenAI",
"Operator",
"PanguBot",
"Panscient",
"panscient.com",
"Perplexity-User",
"PerplexityBot",
"PetalBot",
"PhindBot",
"Poggio-Citations",
"Poseidon Research Crawler",
"QualifiedBot",
"QuillBot",
"quillbot.com",
"SBIntuitionsBot",
"Scrapy",
"SemrushBot-OCOB",
"SemrushBot-SWA",
"ShapBot",
"Sidetrade indexer bot",
"Spider",
"TavilyBot",
"TerraCotta",
"Thinkbot",
"TikTokSpider",
"Timpibot",
"TwinAgent",
"VelenPublicWebCrawler",
"WARDBot",
"Webzio-Extended",
"webzio-extended",
"wpbot",
"WRTNBot",
"YaK",
"YandexAdditional",
"YandexAdditionalBot",
"YouBot",
"ZanistaBot"
]