# robots.txt for MockAddress # 允许所有搜索引擎爬虫访问网站 User-agent: * # 允许访问 llms.txt(AI 助手文档) Allow: /llms.txt # 禁止爬取保存地址页面(用户个人数据) Disallow: /saved-addresses/ Disallow: /en/saved-addresses/ Disallow: /ru/saved-addresses/ Disallow: /es/saved-addresses/ Disallow: /pt/saved-addresses/ # AI-specific crawlers (允许 AI 爬虫访问 llms.txt) User-agent: GPTBot Allow: /llms.txt User-agent: ChatGPT-User Allow: /llms.txt User-agent: Claude-Web Allow: /llms.txt User-agent: anthropic-ai Allow: /llms.txt User-agent: Google-Extended Allow: /llms.txt User-agent: Gemini Allow: /llms.txt User-agent: CCBot Allow: /llms.txt # 中国AI爬虫(用于训练大语言模型) User-agent: Bytespider Allow: /llms.txt User-agent: DoubaoBot Allow: /llms.txt User-agent: BaiduSpider Allow: /llms.txt User-agent: QwenBot Allow: /llms.txt User-agent: AlibabaBot Allow: /llms.txt # 其他AI训练爬虫 User-agent: Applebot-Extended Allow: /llms.txt User-agent: PerplexityBot Allow: /llms.txt # Sitemap 位置 # 主索引文件(推荐,会自动包含所有子 sitemap) Sitemap: https://mockaddress.com/sitemap.xml # 各语言 sitemap(可选,但建议都加上) Sitemap: https://mockaddress.com/sitemap-zh.xml Sitemap: https://mockaddress.com/sitemap-en.xml Sitemap: https://mockaddress.com/sitemap-ru.xml Sitemap: https://mockaddress.com/sitemap-es.xml Sitemap: https://mockaddress.com/sitemap-pt.xml