# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# Vidai marketing site — crawler policy
# =====================================================================
# Policy summary:
#   - All general search engines: allow everything (default).
#   - All known AI answer engines (ChatGPT, Claude, Perplexity, Google
#     AI Overviews via Google-Extended, etc.): allow everything.
#     Vidai content is intended to be cited; we want LLMs reading it.
#   - "Quiet" training-only crawlers with no end-user benefit
#     (Common Crawl, FacebookBot-for-training): disallow. These add
#     load and feed someone else's model without ever surfacing us.
#
# Last updated: 2026-05-26.
# Maintained alongside /llms.txt and the AEO baseline at
# gtm/aeo-baseline.md. If a new AI engine launches with a documented
# user-agent, add it here and to llms.txt.

# ─────────────────────────────────────────────────────────────────────
# Default — all crawlers
# ─────────────────────────────────────────────────────────────────────
User-agent: *
Allow: /

# ─────────────────────────────────────────────────────────────────────
# AI answer engines (named, explicit allow — defends against any
# future restrictive default)
# ─────────────────────────────────────────────────────────────────────

# OpenAI / ChatGPT — two user-agents:
#   - OAI-SearchBot: powers ChatGPT search results (the cited answers)
#   - GPTBot: training crawler
# Both allowed; we want to be cited AND in training data.
User-agent: OAI-SearchBot
Allow: /

User-agent: GPTBot
Allow: /

# Anthropic / Claude — two user-agents:
#   - ClaudeBot: training crawler
#   - Claude-User: real-time fetches when a user invokes browse
User-agent: ClaudeBot
Allow: /

User-agent: Claude-User
Allow: /

# Perplexity — answer engine; cites sources explicitly
User-agent: PerplexityBot
Allow: /

# Google AI Overviews + Gemini training. Google-Extended is the
# distinct opt-in for AI training, separate from Googlebot (which
# powers classic search). Allow both.
User-agent: Google-Extended
Allow: /

User-agent: Googlebot
Allow: /

# Microsoft Bing / Copilot — single user-agent powers both
User-agent: Bingbot
Allow: /

# Apple Intelligence
User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

# You.com
User-agent: YouBot
Allow: /

# DuckDuckGo
User-agent: DuckDuckBot
Allow: /

# ─────────────────────────────────────────────────────────────────────
# Disallowed — training-only crawlers with no end-user benefit
# ─────────────────────────────────────────────────────────────────────

# Common Crawl — bulk web archive. Many AI models train on it, but we
# already serve the named AI crawlers above directly, so Common Crawl
# adds duplicate load without unique benefit.
User-agent: CCBot
Disallow: /

# ByteDance / TikTok training crawler
User-agent: Bytespider
Disallow: /

# Meta's AI training crawler (separate from FacebookExternalHit
# which fetches OG cards for link previews; that one stays allowed
# via the default rule).
User-agent: FacebookBot
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

# Amazon training crawler (not Alexa or shopping crawlers; just the
# bulk training one).
User-agent: Amazonbot
Disallow: /

# ─────────────────────────────────────────────────────────────────────
# Sitemap
# ─────────────────────────────────────────────────────────────────────
Sitemap: https://vidai.uk/sitemap-index.xml