通过LLM从页面中提取结构化数据
from firecrawl import FirecrawlApp from pydantic import BaseModel, Field # 使用你的API密钥初始化FirecrawlApp app = FirecrawlApp(api_key='your_api_key') class ExtractSchema(BaseModel): company_mission: str supports_sso: bool is_open_source: bool is_in_yc: bool data = app.scrape_url('https://docs.firecrawl.dev/', { 'formats': ['json'], 'jsonOptions': { 'schema': ExtractSchema.model_json_schema(), } }) print(data["json"])
{ "success": true, "data": { "json": { "company_mission": "训练一个安全的人工智能,利用您的技术资源回答客户和员工的问题,这样您的团队就不必这样做了", "supports_sso": true, "is_open_source": false, "is_in_yc": true }, "metadata": { "title": "Mendable", "description": "Mendable让您轻松构建AI聊天应用。摄取、定制,然后只需一行代码即可在您想要的任何地方部署。由SideGuide提供支持", "robots": "follow, index", "ogTitle": "Mendable", "ogDescription": "Mendable让您轻松构建AI聊天应用。摄取、定制,然后只需一行代码即可在您想要的任何地方部署。由SideGuide提供支持", "ogUrl": "https://docs.firecrawl.dev/", "ogImage": "https://docs.firecrawl.dev/mendable_new_og1.png", "ogLocaleAlternate": [], "ogSiteName": "Mendable", "sourceURL": "https://docs.firecrawl.dev/" }, } }
prompt
curl -X POST https://api.firecrawl.dev/v1/scrape \ -H 'Content-Type: application/json' \ -H 'Authorization: Bearer YOUR_API_KEY' \ -d '{ "url": "https://docs.firecrawl.dev/", "formats": ["json"], "jsonOptions": { "prompt": "从页面中提取公司使命。" } }'
{ "success": true, "data": { "json": { "company_mission": "训练一个安全的人工智能,使用您的技术资源回答客户和员工的问题,以便您的团队不必这样做", }, "metadata": { "title": "Mendable", "description": "Mendable 允许您轻松构建 AI 聊天应用程序。摄取、自定义,然后只需一行代码即可在您想要的任何位置部署。由 SideGuide 提供支持", "robots": "follow, index", "ogTitle": "Mendable", "ogDescription": "Mendable 允许您轻松构建 AI 聊天应用程序。摄取、自定义,然后只需一行代码即可在您想要的任何位置部署。由 SideGuide 提供支持", "ogUrl": "https://docs.firecrawl.dev/", "ogImage": "https://docs.firecrawl.dev/mendable_new_og1.png", "ogLocaleAlternate": [], "ogSiteName": "Mendable", "sourceURL": "https://docs.firecrawl.dev/" }, } }
extract
schema
systemPrompt