从任何网站生成LLMs.txt文件,用于LLM训练和分析
/llmstxt
llms.txt
llms-full.txt
from firecrawl import FirecrawlApp # 初始化客户端 firecrawl = FirecrawlApp(api_key="your_api_key") # 定义生成参数 params = { "maxUrls": 2, # 要分析的最大URL数量 "showFullText": True # 在结果中包含全文 } # 使用轮询生成 LLMs.txt results = firecrawl.generate_llms_text( url="https://example.com", params=params ) # 访问生成结果 if results['success']: print(f"状态: {results['status']}") print(f"生成的数据: {results['data']}") else: print(f"错误: {results.get('error', '未知错误')}")
from firecrawl import FirecrawlApp # 初始化客户端 firecrawl = FirecrawlApp(api_key="your_api_key") # 创建异步任务 job = firecrawl.async_generate_llms_text( url="https://example.com", ) if job['success']: job_id = job['id'] # 检查 LLMs.txt 生成状态 status = firecrawl.check_generate_llms_text_status("job_id") # 打印当前状态 print(f"Status: {status['status']}") if status['status'] == 'completed': print("LLMs.txt 内容:", status['data']['llmstxt']) if 'llmsfulltxt' in status['data']: print("完整文本内容:", status['data']['llmsfulltxt']) print(f"处理过的 URLs: {len(status['data']['processedUrls'])}")
{ "success": true, "data": { "llmstxt": "# Firecrawl.dev llms.txt - [Web Data Extraction Tool](https://www.firecrawl.dev/)...", "llmsfulltxt": "# Firecrawl.dev llms-full.txt " }, "status": "processing", "expiresAt": "2025-03-03T23:19:18.000Z" }
{ "success": true, "data": { "llmstxt": "# http://firecrawl.dev llms.txt - [Web数据提取工具](https://www.firecrawl.dev/): 轻松将网站转换为干净的、适用于LLM的数据。 - [灵活的网页抓取定价](https://www.firecrawl.dev/pricing): 提供灵活的网页抓取和数据提取定价计划。 - [网页抓取与人工智能](https://www.firecrawl.dev/blog): 浏览有关网页抓取和人工智能的教程和文章...", "llmsfulltxt": "# http://firecrawl.dev llms-full.txt ## Web数据提取工具 介绍 /extract - 通过提示获取网页数据 [立即尝试](https://www.firecrawl.dev/extract) [💥购买年度计划可享受2个月免费](https://www.firecrawl.dev/pricing)..." }, "status": "completed", "expiresAt": "2025-03-03T22:45:50.000Z" }
maxUrls