> ## Documentation Index
> Fetch the complete documentation index at: https://gcore.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Check inference deployment quota

> Check if global quota is exceeded, if yes the number of additional
quotas needed to create the specified inference deployment will be calculated.


## OpenAPI

````yaml /api-reference/services_docs_mintlify/cloud_api.yaml post /cloud/v3/inference/{project_id}/deployments/check_limits
openapi: 3.1.0
info:
  title: Gcore OpenAPI – Cloud API
  description: >-
    This OpenAPI is an aggregated OpenAPI specification that unifies all Gcore
    products into a single file. It covers Cloud, CDN, DNS, WAAP, DDoS
    Protection, Object Storage, Streaming, and FastEdge services.
  version: 2606518da447
servers:
  - url: https://api.gcore.com
security:
  - APIKey: []
tags:
  - name: Bare Metal
  - name: Container as a Service
  - name: Cost Reports
  - name: DDoS Protection
  - name: Everywhere Inference
  - name: Everywhere Inference Apps
  - name: File Shares
  - name: Floating IPs
  - name: Function as a Service
  - name: GPU Bare Metal
  - name: GPU Virtual
  - name: IP Ranges
  - name: Images
  - name: Instances
  - name: Load Balancers
  - name: Logging
  - name: Managed Kubernetes
  - name: Managed PostgreSQL
  - name: Networks
  - name: Placement Groups
  - name: Ports
  - name: Projects
  - name: Quotas
  - name: Regions
  - name: Registry
  - name: Reservations
  - name: Reserved IPs
  - name: Routers
  - name: SSH Keys
  - name: Secrets
  - name: Security Groups
  - name: Snapshot Schedules
  - name: Snapshots
  - name: Tasks
  - name: User Actions
  - name: User Role Assignments
  - name: Volumes
paths:
  /cloud/v3/inference/{project_id}/deployments/check_limits:
    post:
      tags:
        - Everywhere Inference
      summary: Check inference deployment quota
      description: >-
        Check if global quota is exceeded, if yes the number of additional

        quotas needed to create the specified inference deployment will be
        calculated.
      operationId: InferenceCheckLimitsHandlerV3.post
      parameters:
        - in: path
          name: project_id
          required: true
          description: Project ID
          schema:
            description: Project ID
            example: 1
            examples:
              - 1
            title: Project Id
            type: integer
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CheckQuotaBeforeInferenceCreationSerializer'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/InferenceRequiredQuotaSerializer'
components:
  schemas:
    CheckQuotaBeforeInferenceCreationSerializer:
      properties:
        containers:
          description: List of containers for the inference instance.
          example:
            - region_id: 1
              scale:
                max: 3
                min: 1
          examples:
            - - region_id: 1
                scale:
                  max: 3
                  min: 1
          items:
            $ref: '#/components/schemas/ContainerInSerializerV3'
          minItems: 1
          title: Containers
          type: array
        flavor_name:
          description: Inference flavor name.
          example: inference-16vcpu-232gib-1xh100-80gb
          examples:
            - inference-16vcpu-232gib-1xh100-80gb
          minLength: 1
          title: Flavor Name
          type: string
      required:
        - flavor_name
        - containers
      title: CheckQuotaBeforeInferenceCreationSerializer
      type: object
    InferenceRequiredQuotaSerializer:
      properties:
        inference_cpu_millicore_count_limit:
          description: Inference CPU millicore count limit
          example: 8000
          examples:
            - 8000
          title: Inference Cpu Millicore Count Limit
          type: integer
        inference_cpu_millicore_count_requested:
          description: Inference CPU millicore count requested
          example: 3000
          examples:
            - 3000
          title: Inference Cpu Millicore Count Requested
          type: integer
        inference_cpu_millicore_count_usage:
          description: Inference CPU millicore count usage
          example: 2000
          examples:
            - 2000
          title: Inference Cpu Millicore Count Usage
          type: integer
        inference_gpu_a100_count_limit:
          description: Inference GPU A100 Count limit
          example: 4
          examples:
            - 4
          title: Inference Gpu A100 Count Limit
          type: integer
        inference_gpu_a100_count_requested:
          description: Inference GPU A100 Count requested
          example: 2
          examples:
            - 2
          title: Inference Gpu A100 Count Requested
          type: integer
        inference_gpu_a100_count_usage:
          description: Inference GPU A100 Count usage
          example: 1
          examples:
            - 1
          title: Inference Gpu A100 Count Usage
          type: integer
        inference_gpu_h100_count_limit:
          description: Inference GPU H100 Count limit
          example: 4
          examples:
            - 4
          title: Inference Gpu H100 Count Limit
          type: integer
        inference_gpu_h100_count_requested:
          description: Inference GPU H100 Count requested
          example: 2
          examples:
            - 2
          title: Inference Gpu H100 Count Requested
          type: integer
        inference_gpu_h100_count_usage:
          description: Inference GPU H100 Count usage
          example: 1
          examples:
            - 1
          title: Inference Gpu H100 Count Usage
          type: integer
        inference_gpu_l40s_count_limit:
          description: Inference GPU L40s Count limit
          example: 4
          examples:
            - 4
          title: Inference Gpu L40S Count Limit
          type: integer
        inference_gpu_l40s_count_requested:
          description: Inference GPU L40s Count requested
          example: 2
          examples:
            - 2
          title: Inference Gpu L40S Count Requested
          type: integer
        inference_gpu_l40s_count_usage:
          description: Inference GPU L40s Count usage
          example: 1
          examples:
            - 1
          title: Inference Gpu L40S Count Usage
          type: integer
        inference_instance_count_limit:
          description: Inference instance count limit
          example: 10
          examples:
            - 10
          title: Inference Instance Count Limit
          type: integer
        inference_instance_count_requested:
          description: Inference instance count requested
          example: 1
          examples:
            - 1
          title: Inference Instance Count Requested
          type: integer
        inference_instance_count_usage:
          description: Inference instance count usage
          example: 1
          examples:
            - 1
          title: Inference Instance Count Usage
          type: integer
      title: InferenceRequiredQuotaSerializer
      type: object
    ContainerInSerializerV3:
      properties:
        region_id:
          description: Region id for the container
          example: 1337
          examples:
            - 1337
          title: Region Id
          type: integer
        scale:
          $ref: '#/components/schemas/ContainerScaleSerializerV3'
          description: Scale for the container
          examples:
            - max: 3
              min: 1
      required:
        - region_id
        - scale
      title: ContainerInSerializerV3
      type: object
    ContainerScaleSerializerV3:
      properties:
        cooldown_period:
          anyOf:
            - maximum: 3600
              minimum: 1
              type: integer
            - type: 'null'
          default: 60
          description: Cooldown period between scaling actions in seconds
          examples:
            - 60
          title: Cooldown Period
        max:
          description: Maximum scale for the container
          example: 3
          examples:
            - 3
          maximum: 300
          title: Max
          type: integer
        min:
          description: Minimum scale for the container
          example: 1
          examples:
            - 1
          minimum: 0
          title: Min
          type: integer
        polling_interval:
          anyOf:
            - maximum: 3600
              minimum: 5
              type: integer
            - type: 'null'
          default: 30
          description: Polling interval for scaling triggers in seconds
          examples:
            - 30
          title: Polling Interval
        triggers:
          $ref: '#/components/schemas/ContainerScaleTriggersSerializer'
          default:
            cpu:
              threshold: 80
            memory:
              threshold: 80
            gpu_utilization: null
            gpu_memory: null
            http: null
            sqs: null
          description: Triggers for scaling actions
          examples:
            - cpu:
                threshold: 75
      required:
        - min
        - max
      title: ContainerScaleSerializerV3
      type: object
    ContainerScaleTriggersSerializer:
      properties:
        cpu:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: CPU trigger configuration
          examples:
            - threshold: 80
        gpu_memory:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: >-
            GPU memory trigger configuration. Calculated by
            `DCGM_FI_DEV_MEM_COPY_UTIL` metric
          examples:
            - threshold: 80
        gpu_utilization:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: >-
            GPU utilization trigger configuration. Calculated by
            `DCGM_FI_DEV_GPU_UTIL` metric
          examples:
            - threshold: 80
        http:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersRateSerializer'
            - type: 'null'
          default: null
          description: HTTP trigger configuration
          examples:
            - rate: 1
              window: 60
        memory:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: Memory trigger configuration
          examples:
            - threshold: 80
        sqs:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersSqsSerializer'
            - type: 'null'
          default: null
          description: SQS trigger configuration
      title: ContainerScaleTriggersSerializer
      type: object
    ContainerScaleTriggersThresholdSerializer:
      properties:
        threshold:
          description: Threshold value for the trigger in percentage
          example: 75
          examples:
            - 75
          maximum: 100
          minimum: 1
          title: Threshold
          type: integer
      required:
        - threshold
      title: ContainerScaleTriggersThresholdSerializer
      type: object
    ContainerScaleTriggersRateSerializer:
      properties:
        rate:
          description: Request count per 'window' seconds for the http trigger
          example: 1
          examples:
            - 1
          maximum: 1000
          minimum: 1
          title: Rate
          type: integer
        window:
          description: Time window for rate calculation in seconds
          example: 60
          examples:
            - 60
          maximum: 3600
          minimum: 1
          title: Window
          type: integer
      required:
        - rate
        - window
      title: ContainerScaleTriggersRateSerializer
      type: object
    ContainerScaleTriggersSqsSerializer:
      properties:
        activation_queue_length:
          description: Number of messages for activation
          minimum: 1
          title: Activation Queue Length
          type: integer
        aws_endpoint:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Custom AWS endpoint
          title: Aws Endpoint
        aws_region:
          description: AWS region
          example: us-east-1
          examples:
            - us-east-1
          minLength: 1
          title: Aws Region
          type: string
        queue_length:
          description: Number of messages for one replica
          example: 10
          examples:
            - 10
          minimum: 1
          title: Queue Length
          type: integer
        queue_url:
          description: SQS queue URL
          example: https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          examples:
            - https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          minLength: 1
          title: Queue Url
          type: string
        scale_on_delayed:
          default: false
          description: Scale on delayed messages
          title: Scale On Delayed
          type: boolean
        scale_on_flight:
          default: false
          description: Scale on in-flight messages
          title: Scale On Flight
          type: boolean
        secret_name:
          description: Auth secret name
          minLength: 1
          title: Secret Name
          type: string
      required:
        - queue_url
        - queue_length
        - activation_queue_length
        - aws_region
        - secret_name
      title: ContainerScaleTriggersSqsSerializer
      type: object
  securitySchemes:
    APIKey:
      description: >-
        API key for authentication. Make sure to include the word `apikey`,
        followed by a single space and then your token.

        Example: `apikey 1234$abcdef`
      type: apiKey
      in: header
      name: Authorization

````