# robots.txt — sergiolepone.com # Política activa: ESCENARIO B # Permitir grounding en tiempo real (visibilidad GEO en LLMs) # Bloquear bots de entrenamiento (proteger contenido editorial) # Última revisión: 2026-04-30 # ============================================================ # BUSCADORES TRADICIONALES — ACCESO COMPLETO # ============================================================ User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Yandex Allow: / User-agent: Applebot Allow: / # ============================================================ # BOTS IA — GROUNDING EN TIEMPO REAL: PERMITIDOS # (citan al usuario, no entrenan modelo con el contenido) # ============================================================ # OpenAI — búsqueda en ChatGPT y SearchGPT User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / # Anthropic — Claude responde citando con grounding User-agent: Claude-User Allow: / User-agent: Claude-SearchBot Allow: / # Perplexity — search engine GEO-friendly User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / # ============================================================ # BOTS IA — ENTRENAMIENTO DE MODELOS: BLOQUEADOS # ============================================================ # OpenAI scraper para entrenamiento User-agent: GPTBot Disallow: / # Anthropic crawler de entrenamiento User-agent: ClaudeBot Disallow: / User-agent: anthropic-ai Disallow: / # Common Crawl (alimenta múltiples LLMs) User-agent: CCBot Disallow: / # Google entrenamiento (Gemini, Vertex AI) User-agent: Google-Extended Disallow: / # Apple entrenamiento (Apple Intelligence) User-agent: Applebot-Extended Disallow: / # Bytedance / Doubao User-agent: Bytespider Disallow: / # Meta entrenamiento User-agent: FacebookBot Disallow: / User-agent: Meta-ExternalAgent Disallow: / # Amazon entrenamiento User-agent: Amazonbot Disallow: / # Diffbot scraping comercial User-agent: Diffbot Disallow: / # Cohere scraping User-agent: cohere-ai Disallow: / # AI2 / AllenAI User-agent: AI2Bot Disallow: / # DeepSeek User-agent: DeepSeekBot Disallow: / # Mistral User-agent: MistralAI-User Allow: / User-agent: MistralAI-Crawler Disallow: / # Generic AI bots User-agent: ImagesiftBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Omgili Disallow: / User-agent: omgilibot Disallow: / User-agent: YouBot Allow: / # ============================================================ # REGLAS GLOBALES PARA EL RESTO # ============================================================ User-agent: * Disallow: /buscar/ Disallow: /api/ Disallow: /_assets/ Allow: / # Sitemap Sitemap: https://sergiolepone.com/sitemap-index.xml # ============================================================ # ESCENARIOS ALTERNATIVOS (comentados, listos para activar) # ============================================================ # --- ESCENARIO A: Abierto total (entrenar OK) --- # User-agent: * # Allow: / # Sitemap: https://sergiolepone.com/sitemap-index.xml # --- ESCENARIO C: Bloqueo total IA --- # User-agent: GPTBot # User-agent: ChatGPT-User # User-agent: OAI-SearchBot # User-agent: ClaudeBot # User-agent: Claude-User # User-agent: Claude-SearchBot # User-agent: anthropic-ai # User-agent: PerplexityBot # User-agent: Perplexity-User # User-agent: CCBot # User-agent: Google-Extended # User-agent: Applebot-Extended # User-agent: Bytespider # User-agent: FacebookBot # User-agent: Meta-ExternalAgent # User-agent: Amazonbot # User-agent: Diffbot # User-agent: cohere-ai # User-agent: AI2Bot # User-agent: DeepSeekBot # User-agent: MistralAI-User # User-agent: MistralAI-Crawler # Disallow: /