> ## Documentation Index
> Fetch the complete documentation index at: https://firecrawl.web3doc.top/llms.txt
> Use this file to discover all available pages before exploring further.

# 抓取


## OpenAPI

````yaml v1-openapi POST /crawl
openapi: 3.0.0
info:
  title: Firecrawl API
  version: v1
  description: >-
    API for interacting with Firecrawl services to perform web scraping and
    crawling tasks.
  contact:
    name: Firecrawl Support
    url: https://firecrawl.dev/support
    email: support@firecrawl.dev
servers:
  - url: https://api.firecrawl.dev/v1
security:
  - bearerAuth: []
paths:
  /crawl:
    post:
      tags:
        - Crawling
      summary: Crawl multiple URLs based on options
      operationId: crawlUrls
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                url:
                  type: string
                  format: uri
                  description: The base URL to start crawling from
                excludePaths:
                  type: array
                  items:
                    type: string
                  description: >-
                    URL pathname regex patterns that exclude matching URLs from
                    the crawl. For example, if you set "excludePaths":
                    ["blog/.*"] for the base URL firecrawl.dev, any results
                    matching that pattern will be excluded, such as
                    https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.
                includePaths:
                  type: array
                  items:
                    type: string
                  description: >-
                    URL pathname regex patterns that include matching URLs in
                    the crawl. Only the paths that match the specified patterns
                    will be included in the response. For example, if you set
                    "includePaths": ["blog/.*"] for the base URL firecrawl.dev,
                    only results matching that pattern will be included, such as
                    https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.
                maxDepth:
                  type: integer
                  description: >-
                    Maximum depth to crawl relative to the base URL. Basically,
                    the max number of slashes the pathname of a scraped URL may
                    contain.
                  default: 10
                maxDiscoveryDepth:
                  type: integer
                  description: >-
                    Maximum depth to crawl based on discovery order. The root
                    site and sitemapped pages has a discovery depth of 0. For
                    example, if you set it to 1, and you set ignoreSitemap, you
                    will only crawl the entered URL and all URLs that are linked
                    on that page.
                ignoreSitemap:
                  type: boolean
                  description: Ignore the website sitemap when crawling
                  default: false
                ignoreQueryParameters:
                  type: boolean
                  description: >-
                    Do not re-scrape the same path with different (or none)
                    query parameters
                  default: false
                limit:
                  type: integer
                  description: Maximum number of pages to crawl. Default limit is 10000.
                  default: 10000
                allowBackwardLinks:
                  type: boolean
                  description: >-
                    Enables the crawler to navigate from a specific URL to
                    previously linked pages.
                  default: false
                allowExternalLinks:
                  type: boolean
                  description: Allows the crawler to follow links to external websites.
                  default: false
                webhook:
                  type: object
                  description: A webhook specification object.
                  properties:
                    url:
                      type: string
                      description: >-
                        The URL to send the webhook to. This will trigger for
                        crawl started (crawl.started), every page crawled
                        (crawl.page) and when the crawl is completed
                        (crawl.completed or crawl.failed). The response will be
                        the same as the `/scrape` endpoint.
                    headers:
                      type: object
                      description: Headers to send to the webhook URL.
                      additionalProperties:
                        type: string
                    metadata:
                      type: object
                      description: >-
                        Custom metadata that will be included in all webhook
                        payloads for this crawl
                      additionalProperties: true
                    events:
                      type: array
                      description: >-
                        Type of events that should be sent to the webhook URL.
                        (default: all)
                      items:
                        type: string
                        enum:
                          - completed
                          - page
                          - failed
                          - started
                  required:
                    - url
                scrapeOptions:
                  $ref: '#/components/schemas/ScrapeOptions'
              required:
                - url
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlResponse'
        '402':
          description: Payment required
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    example: Payment required to access this resource.
        '429':
          description: Too many requests
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    example: >-
                      Request rate limit exceeded. Please wait and try again
                      later.
        '500':
          description: Server error
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    example: An unexpected error occurred on the server.
      security:
        - bearerAuth: []
components:
  schemas:
    ScrapeOptions:
      type: object
      properties:
        formats:
          type: array
          items:
            type: string
            enum:
              - markdown
              - html
              - rawHtml
              - links
              - screenshot
              - screenshot@fullPage
              - json
          description: Formats to include in the output.
          default:
            - markdown
        onlyMainContent:
          type: boolean
          description: >-
            Only return the main content of the page excluding headers, navs,
            footers, etc.
          default: true
        includeTags:
          type: array
          items:
            type: string
          description: Tags to include in the output.
        excludeTags:
          type: array
          items:
            type: string
          description: Tags to exclude from the output.
        headers:
          type: object
          description: >-
            Headers to send with the request. Can be used to send cookies,
            user-agent, etc.
        waitFor:
          type: integer
          description: >-
            Specify a delay in milliseconds before fetching the content,
            allowing the page sufficient time to load.
          default: 0
        mobile:
          type: boolean
          description: >-
            Set to true if you want to emulate scraping from a mobile device.
            Useful for testing responsive pages and taking mobile screenshots.
          default: false
        skipTlsVerification:
          type: boolean
          description: Skip TLS certificate verification when making requests
          default: false
        timeout:
          type: integer
          description: Timeout in milliseconds for the request
          default: 30000
        jsonOptions:
          type: object
          description: Extract object
          properties:
            schema:
              type: object
              description: The schema to use for the extraction (Optional)
            systemPrompt:
              type: string
              description: The system prompt to use for the extraction (Optional)
            prompt:
              type: string
              description: The prompt to use for the extraction without a schema (Optional)
        actions:
          type: array
          description: Actions to perform on the page before grabbing the content
          items:
            oneOf:
              - type: object
                title: Wait
                properties:
                  type:
                    type: string
                    enum:
                      - wait
                    description: Wait for a specified amount of milliseconds
                  milliseconds:
                    type: integer
                    minimum: 1
                    description: Number of milliseconds to wait
                  selector:
                    type: string
                    description: Query selector to find the element by
                    example: '#my-element'
                required:
                  - type
              - type: object
                title: Screenshot
                properties:
                  type:
                    type: string
                    enum:
                      - screenshot
                    description: >-
                      Take a screenshot. The links will be in the response's
                      `actions.screenshots` array.
                  fullPage:
                    type: boolean
                    description: Should the screenshot be full-page or viewport sized?
                    default: false
                required:
                  - type
              - type: object
                title: Click
                properties:
                  type:
                    type: string
                    enum:
                      - click
                    description: Click on an element
                  selector:
                    type: string
                    description: Query selector to find the element by
                    example: '#load-more-button'
                required:
                  - type
                  - selector
              - type: object
                title: Write text
                properties:
                  type:
                    type: string
                    enum:
                      - write
                    description: >-
                      Write text into an input field, text area, or
                      contenteditable element. Note: You must first focus the
                      element using a 'click' action before writing. The text
                      will be typed character by character to simulate keyboard
                      input.
                  text:
                    type: string
                    description: Text to type
                    example: Hello, world!
                required:
                  - type
                  - text
              - type: object
                title: Press a key
                description: >-
                  Press a key on the page. See
                  https://asawicki.info/nosense/doc/devices/keyboard/key_codes.html
                  for key codes.
                properties:
                  type:
                    type: string
                    enum:
                      - press
                    description: Press a key on the page
                  key:
                    type: string
                    description: Key to press
                    example: Enter
                required:
                  - type
                  - key
              - type: object
                title: Scroll
                properties:
                  type:
                    type: string
                    enum:
                      - scroll
                    description: Scroll the page or a specific element
                  direction:
                    type: string
                    enum:
                      - up
                      - down
                    description: Direction to scroll
                    default: down
                  selector:
                    type: string
                    description: Query selector for the element to scroll
                    example: '#my-element'
                required:
                  - type
              - type: object
                title: Scrape
                properties:
                  type:
                    type: string
                    enum:
                      - scrape
                    description: >-
                      Scrape the current page content, returns the url and the
                      html.
                required:
                  - type
              - type: object
                title: Execute JavaScript
                properties:
                  type:
                    type: string
                    enum:
                      - executeJavascript
                    description: Execute JavaScript code on the page
                  script:
                    type: string
                    description: JavaScript code to execute
                    example: document.querySelector('.button').click();
                required:
                  - type
                  - script
        location:
          type: object
          description: >-
            Location settings for the request. When specified, this will use an
            appropriate proxy if available and emulate the corresponding
            language and timezone settings. Defaults to 'US' if not specified.
          properties:
            country:
              type: string
              description: ISO 3166-1 alpha-2 country code (e.g., 'US', 'AU', 'DE', 'JP')
              pattern: ^[A-Z]{2}$
              default: US
            languages:
              type: array
              description: >-
                Preferred languages and locales for the request in order of
                priority. Defaults to the language of the specified location.
                See
                https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language
              items:
                type: string
                example: en-US
        removeBase64Images:
          type: boolean
          description: >-
            Removes all base 64 images from the output, which may be
            overwhelmingly long. The image's alt text remains in the output, but
            the URL is replaced with a placeholder.
        blockAds:
          type: boolean
          description: Enables ad-blocking and cookie popup blocking.
          default: true
        proxy:
          type: string
          enum:
            - basic
            - stealth
          description: >-
            Specifies the type of proxy to use.

             - **basic**: Proxies for scraping sites with none to basic anti-bot solutions. Fast and usually works.
             - **stealth**: Stealth proxies for scraping sites with advanced anti-bot solutions. Slower, but more reliable on certain sites.

            If you do not specify a proxy, Firecrawl will automatically attempt
            to determine which one you need based on the target site.
    CrawlResponse:
      type: object
      properties:
        success:
          type: boolean
        id:
          type: string
        url:
          type: string
          format: uri
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer

````