Expanding blocklist for robots.txt

This commit is contained in:
Awstin 2025-05-28 07:57:19 -04:00
parent d57145f548
commit 525a484e0a
2 changed files with 51 additions and 13 deletions

View file

@ -1,31 +1,67 @@
User-agent: * User-agent: *
Disallow: Disallow:
User-agent: AdsBot-Google User-agent: AI2Bot
User-agent: Ai2Bot-Dolma
User-agent: aiHitBot
User-agent: AliyunSecBot
User-agent: AliyunSecBot/Aliyun
User-agent: AliyunSecBot/Nutch-1.21-SNAPSHOT
User-agent: Amazonbot User-agent: Amazonbot
User-agent: AndiBot
User-agent: anthropic-ai User-agent: anthropic-ai
User-agent: Applebot User-agent: Applebot-Extended
User-agent: AwarioRssBot User-agent: Brightbot
User-agent: AwarioSmartBot User-agent: Brightbot 1.0
User-agent: Bytespider User-agent: Bytespider
User-agent: CCBot User-agent: CCBot
User-agent: ChatGPT-User User-agent: ChatGPT-User
User-agent: ClaudeBot User-agent: ClaudeBot
User-agent: Claude-SearchBot
User-agent: Claude-User
User-agent: Claude-Web User-agent: Claude-Web
User-agent: cohere-ai User-agent: cohere-ai
User-agent: DataForSeoBot User-agent: cohere-training-data-crawler
User-agent: Cotoyogi
User-agent: Crawlspace
User-agent: Diffbot
User-agent: DuckAssistBot
User-agent: ExaBot
User-agent: FacebookBot User-agent: FacebookBot
User-agent: Factset_spyderbot
User-agent: firecrawlAgent
User-agent: Google-CloudVertexBot
User-agent: Google-Extended User-agent: Google-Extended
User-agent: GoogleOther User-agent: GoogleOther
User-agent: GoogleOther-Image
User-agent: GoogleOther-Video
User-agent: GPTBot User-agent: GPTBot
User-agent: iaskspider
User-agent: iaskspider/2.0
User-agent: ICC-Crawler
User-agent: ImagesiftBot User-agent: ImagesiftBot
User-agent: magpie-crawler User-agent: img2dataset
User-agent: Meltwater User-agent: ISSCyberRiskCrawler
User-agent: Kangaroo Bot
User-agent: Meta-ExternalAgent
User-agent: Meta-ExternalFetcher
User-agent: MistralAI-User/1.0
User-agent: NovaAct
User-agent: OAI-SearchBot
User-agent: omgili User-agent: omgili
User-agent: omgilibot User-agent: omgilibot
User-agent: peer39_crawler User-agent: Operator
User-agent: peer39_crawler/1.0 User-agent: PanguBot
User-agent: PerplexityBot User-agent: PerplexityBot
User-agent: Seekr User-agent: Perplexity-User
User-agent: YouBot User-agent: PetalBot
User-agent: PhindBot
User-agent: QualifiedBot
User-agent: SemrushBot-OCOB
User-agent: SemrushBot-SWA
User-agent: Sidetrade indexer bot
User-agent: TikTokSpider
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: wpbot
Disallow: / Disallow: /

View file

@ -1,7 +1,8 @@
use achubb_website::{run_load, run_server, database::establish_connection}; use achubb_website::{database::establish_connection, run_load, run_server};
use clap::Parser; use clap::Parser;
use std::error::Error; use dotenv::dotenv;
use sqlx::postgres::PgPool; use sqlx::postgres::PgPool;
use std::error::Error;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(version, about, long_about = None)] #[command(version, about, long_about = None)]
@ -15,6 +16,7 @@ struct Args {
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> { async fn main() -> Result<(), Box<dyn Error>> {
dotenv().ok();
let args = Args::parse(); let args = Args::parse();
let pool: PgPool = match establish_connection().await { let pool: PgPool = match establish_connection().await {