> ## Documentation Index
> Fetch the complete documentation index at: https://gcore.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Create inference deployment



## OpenAPI

````yaml /api-reference/services_documented/cloud_api.yaml post /cloud/v3/inference/{project_id}/deployments
openapi: 3.1.0
info:
  title: Gcore OpenAPI – Cloud API
  description: >-
    This OpenAPI is an aggregated OpenAPI specification that unifies all Gcore
    products into a single file. It covers Cloud, CDN, DNS, WAAP, DDoS
    Protection, Object Storage, Streaming, and FastEdge services.
  version: '2026-05-15T06:37:28.230198+00:00'
servers:
  - url: https://api.gcore.com
security:
  - APIKey: []
tags:
  - name: Bare Metal
    x-displayName: Bare Metal
  - name: Container as a Service
    x-displayName: Container as a Service
  - name: Cost Reports
    x-displayName: Cost Reports
  - name: DDoS Protection
    x-displayName: DDoS Protection
  - name: Everywhere Inference
    x-displayName: Everywhere Inference
  - name: Everywhere Inference Apps
    x-displayName: Everywhere Inference Apps
  - name: File Shares
    x-displayName: File Shares
  - name: Floating IPs
    x-displayName: Floating IPs
  - name: Function as a Service
    x-displayName: Function as a Service
  - name: GPU Bare Metal
    x-displayName: GPU Bare Metal
  - name: GPU Virtual
    x-displayName: GPU Virtual
  - name: IP Ranges
    x-displayName: IP Ranges
  - name: Images
    x-displayName: Images
  - name: Instances
    x-displayName: Instances
  - name: Load Balancers
    x-displayName: Load Balancers
  - name: Logging
    x-displayName: Logging
  - name: Managed Kubernetes
    x-displayName: Managed Kubernetes
  - name: Managed PostgreSQL
    x-displayName: Managed PostgreSQL
  - name: Networks
    x-displayName: Networks
  - name: Placement Groups
    x-displayName: Placement Groups
  - name: Projects
    x-displayName: Projects
  - name: Quotas
    x-displayName: Quotas
  - name: Regions
    x-displayName: Regions
  - name: Registry
    x-displayName: Registry
  - name: Reservations
    x-displayName: Reservations
  - name: Reserved IPs
    x-displayName: Reserved IPs
  - name: Routers
    x-displayName: Routers
  - name: SSH Keys
    x-displayName: SSH Keys
  - name: Secrets
    x-displayName: Secrets
  - name: Security Groups
    x-displayName: Security Groups
  - name: Snapshot Schedules
    x-displayName: Snapshot Schedules
  - name: Snapshots
    x-displayName: Snapshots
  - name: Tasks
    x-displayName: Tasks
  - name: User Actions
    x-displayName: User Actions
  - name: User Role Assignments
    x-displayName: User Role Assignments
  - name: Volumes
    x-displayName: Volumes
paths:
  /cloud/v3/inference/{project_id}/deployments:
    post:
      tags:
        - Everywhere Inference
      summary: Create inference deployment
      operationId: InferenceInstancesHandlerV3.post
      parameters:
        - in: path
          name: project_id
          required: true
          description: Project ID
          schema:
            description: Project ID
            example: 1
            examples:
              - 1
            title: Project Id
            type: integer
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/InferenceInstanceInSerializerV3'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TaskIDsSerializer'
      x-codeSamples:
        - lang: Python
          source: |-
            import os
            from gcore import Gcore

            client = Gcore(
                api_key=os.environ.get("GCORE_API_KEY"),  # This is the default and can be omitted
            )
            task_id_list = client.cloud.inference.deployments.create(
                project_id=1,
                containers=[{
                    "region_id": 1,
                    "scale": {
                        "max": 3,
                        "min": 1,
                    },
                }],
                flavor_name="inference-16vcpu-232gib-1xh100-80gb",
                image="nginx:latest",
                listening_port=80,
                name="my-instance",
            )
            print(task_id_list.tasks)
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/G-Core/gcore-go\"\n\t\"github.com/G-Core/gcore-go/cloud\"\n\t\"github.com/G-Core/gcore-go/option\"\n)\n\nfunc main() {\n\tclient := gcore.NewClient(\n\t\toption.WithAPIKey(\"My API Key\"),\n\t)\n\ttaskIDList, err := client.Cloud.Inference.Deployments.New(context.TODO(), cloud.InferenceDeploymentNewParams{\n\t\tProjectID: gcore.Int(1),\n\t\tContainers: []cloud.InferenceDeploymentNewParamsContainer{{\n\t\t\tRegionID: 1,\n\t\t\tScale: cloud.InferenceDeploymentNewParamsContainerScale{\n\t\t\t\tMax: 3,\n\t\t\t\tMin: 1,\n\t\t\t},\n\t\t}},\n\t\tFlavorName:    \"inference-16vcpu-232gib-1xh100-80gb\",\n\t\tImage:         \"nginx:latest\",\n\t\tListeningPort: 80,\n\t\tName:          \"my-instance\",\n\t})\n\tif err != nil {\n\t\tpanic(err.Error())\n\t}\n\tfmt.Printf(\"%+v\\n\", taskIDList.Tasks)\n}\n"
components:
  schemas:
    InferenceInstanceInSerializerV3:
      properties:
        api_keys:
          description: >-
            List of API keys for the inference instance. Multiple keys can be
            attached to one deployment.If `auth_enabled` and `api_keys` are both
            specified, a ValidationError will be raised.
          example:
            - key1
            - key2
          examples:
            - - key1
              - key2
          items:
            type: string
          title: Api Keys
          type: array
        auth_enabled:
          default: false
          deprecated: true
          description: >-
            Set to `true` to enable API key authentication for the inference
            instance. `"Authorization": "Bearer *****"` or `"X-Api-Key":
            "*****"` header is required for the requests to the instance if
            enabled. This field is deprecated and will be removed in the future.
            Use `api_keys` field instead.If `auth_enabled` and `api_keys` are
            both specified, a ValidationError will be raised.
          example: false
          examples:
            - false
          title: Auth Enabled
          type: boolean
        command:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          default: null
          description: Command to be executed when running a container from an image.
          examples:
            - - nginx
              - '-g'
              - daemon off;
          title: Command
        containers:
          description: List of containers for the inference instance.
          example:
            - region_id: 1
              scale:
                cooldown_period: 60
                max: 3
                min: 1
                triggers:
                  cpu:
                    threshold: 80
                  memory:
                    threshold: 70
          examples:
            - - region_id: 1
                scale:
                  cooldown_period: 60
                  max: 3
                  min: 1
                  triggers:
                    cpu:
                      threshold: 80
                    memory:
                      threshold: 70
          items:
            $ref: '#/components/schemas/ContainerInSerializerV3'
          minItems: 1
          title: Containers
          type: array
        credentials_name:
          anyOf:
            - type: string
            - type: 'null'
          default: ''
          description: Registry credentials name
          examples:
            - dockerhub
          title: Credentials Name
        description:
          anyOf:
            - type: string
            - type: 'null'
          default: ''
          description: Inference instance description.
          examples:
            - My first instance
          title: Description
        envs:
          additionalProperties:
            type: string
          default: {}
          description: Environment variables for the inference instance.
          example:
            DEBUG_MODE: 'False'
            KEY: '12345'
          examples:
            - DEBUG_MODE: 'False'
              KEY: '12345'
          title: Envs
          type: object
        flavor_name:
          description: Flavor name for the inference instance.
          example: inference-16vcpu-232gib-1xh100-80gb
          examples:
            - inference-16vcpu-232gib-1xh100-80gb
          minLength: 1
          title: Flavor Name
          type: string
        image:
          description: >-
            Docker image for the inference instance. This field should contain
            the image name and tag in the format 'name:tag', e.g.,
            'nginx:latest'. It defaults to Docker Hub as the image registry, but
            any accessible Docker image URL can be specified.
          example: nginx:latest
          examples:
            - nginx:latest
          pattern: >-
            ^(?:(?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*)(?::[A-Za-z0-9_][A-Za-z0-9_.-]{0,127})?$
          title: Image
          type: string
        ingress_opts:
          anyOf:
            - $ref: '#/components/schemas/IngressOptsSerializer'
            - type: 'null'
          default: null
          description: Ingress options for the inference instance
          examples:
            - disable_response_buffering: true
        listening_port:
          description: Listening port for the inference instance.
          example: 80
          examples:
            - 80
          maximum: 65535
          minimum: 1
          title: Listening Port
          type: integer
        logging:
          anyOf:
            - $ref: '#/components/schemas/LoggingInSerializer'
            - type: 'null'
          default: null
          description: Logging configuration for the inference instance
          examples:
            - destination_region_id: 1
              enabled: true
              retention_policy:
                period: 42
              topic_name: my-log-name
            - enabled: false
        name:
          description: Inference instance name.
          example: my-instance
          examples:
            - my-instance
          maxLength: 30
          minLength: 4
          pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$
          title: Name
          type: string
        probes:
          anyOf:
            - $ref: '#/components/schemas/InferenceInstanceProbesSerializerV2'
            - type: 'null'
          default: null
          description: >-
            Probes configured for all containers of the inference instance. If
            probes are not provided, and the `image_name` is from a the Model
            Catalog registry, the default probes will be used.
        timeout:
          anyOf:
            - minimum: 0
              type: integer
            - type: 'null'
          default: 120
          description: >-
            Specifies the duration in seconds without any requests after which
            the containers will be downscaled to their minimum scale value as
            defined by `scale.min`. If set, this helps in optimizing resource
            usage by reducing the number of container instances during periods
            of inactivity. The default value when the parameter is not set is
            120.
          examples:
            - 120
          title: Timeout
      required:
        - name
        - image
        - listening_port
        - containers
        - flavor_name
      title: InferenceInstanceInSerializerV3
      type: object
    TaskIDsSerializer:
      properties:
        tasks:
          description: >-
            List of task IDs representing asynchronous operations. Use these IDs
            to monitor operation progress:

            - `GET /v1/tasks/{task_id}` - Check individual task status and
            details

            Poll task status until completion (`FINISHED`/`ERROR`) before
            proceeding with dependent operations.
          example:
            - d478ae29-dedc-4869-82f0-96104425f565
          examples:
            - - d478ae29-dedc-4869-82f0-96104425f565
          items:
            type: string
          title: Tasks
          type: array
      required:
        - tasks
      title: TaskIDsSerializer
      type: object
    ContainerInSerializerV3:
      properties:
        region_id:
          description: Region id for the container
          example: 1337
          examples:
            - 1337
          title: Region Id
          type: integer
        scale:
          $ref: '#/components/schemas/ContainerScaleSerializerV3'
          description: Scale for the container
          examples:
            - max: 3
              min: 1
      required:
        - region_id
        - scale
      title: ContainerInSerializerV3
      type: object
    IngressOptsSerializer:
      properties:
        disable_response_buffering:
          default: false
          description: >-
            Disable response buffering if true. A client usually has a much
            slower connection and can not consume the response data as fast as
            it is produced by an upstream application. Ingress tries to buffer
            the whole response in order to release the upstream application as
            soon as possible.By default, the response buffering is enabled.
          example: true
          examples:
            - true
            - false
          title: Disable Response Buffering
          type: boolean
      title: IngressOptsSerializer
      type: object
    LoggingInSerializer:
      properties:
        destination_region_id:
          anyOf:
            - type: integer
            - type: 'null'
          default: null
          description: ID of the region in which the logs will be stored
          examples:
            - 1
          title: Destination Region Id
        enabled:
          default: false
          description: Enable or disable log streaming
          example: true
          examples:
            - true
            - false
          title: Enabled
          type: boolean
        retention_policy:
          anyOf:
            - $ref: '#/components/schemas/LaasIndexRetentionPolicyPydanticSerializer'
            - type: 'null'
          default: null
          description: Logs retention policy
          examples:
            - period: 45
        topic_name:
          anyOf:
            - maxLength: 223
              minLength: 1
              pattern: >-
                ^[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9](?:[-a-z0-9]*[a-z0-9])*)*$
              type: string
            - type: 'null'
          default: null
          description: The topic name to stream logs to
          examples:
            - my-log-name
          title: Topic Name
      title: LoggingInSerializer
      type: object
    InferenceInstanceProbesSerializerV2:
      additionalProperties: false
      properties:
        liveness_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/InferenceInstanceContainerProbeConfigurationSerializerV2
            - type: 'null'
          description: Liveness probe configuration
        readiness_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/InferenceInstanceContainerProbeConfigurationSerializerV2
            - type: 'null'
          description: Readiness probe configuration
        startup_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/InferenceInstanceContainerProbeConfigurationSerializerV2
            - type: 'null'
          description: Startup probe configuration
      title: InferenceInstanceProbesSerializerV2
      type: object
    ContainerScaleSerializerV3:
      properties:
        cooldown_period:
          anyOf:
            - maximum: 3600
              minimum: 1
              type: integer
            - type: 'null'
          default: 60
          description: Cooldown period between scaling actions in seconds
          examples:
            - 60
          title: Cooldown Period
        max:
          description: Maximum scale for the container
          example: 3
          examples:
            - 3
          maximum: 300
          title: Max
          type: integer
        min:
          description: Minimum scale for the container
          example: 1
          examples:
            - 1
          minimum: 0
          title: Min
          type: integer
        polling_interval:
          anyOf:
            - maximum: 3600
              minimum: 5
              type: integer
            - type: 'null'
          default: 30
          description: Polling interval for scaling triggers in seconds
          examples:
            - 30
          title: Polling Interval
        triggers:
          $ref: '#/components/schemas/ContainerScaleTriggersSerializer'
          default:
            cpu:
              threshold: 80
            memory:
              threshold: 80
            gpu_utilization: null
            gpu_memory: null
            http: null
            sqs: null
          description: Triggers for scaling actions
          examples:
            - cpu:
                threshold: 75
      required:
        - min
        - max
      title: ContainerScaleSerializerV3
      type: object
    LaasIndexRetentionPolicyPydanticSerializer:
      properties:
        period:
          anyOf:
            - exclusiveMinimum: 0
              maximum: 1825
              type: integer
            - type: 'null'
          description: Duration of days for which logs must be kept.
          examples:
            - 45
          title: Period
      required:
        - period
      title: LaasIndexRetentionPolicyPydanticSerializer
      type: object
    InferenceInstanceContainerProbeConfigurationSerializerV2:
      properties:
        enabled:
          description: Whether the probe is enabled or not.
          example: true
          examples:
            - true
            - false
          title: Enabled
          type: boolean
        probe:
          $ref: '#/components/schemas/ContainerProbeSerializerV2'
          description: Probe configuration (exec, `http_get` or `tcp_socket`)
      required:
        - enabled
      title: InferenceInstanceContainerProbeConfigurationSerializerV2
      type: object
    ContainerScaleTriggersSerializer:
      properties:
        cpu:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: CPU trigger configuration
          examples:
            - threshold: 80
        gpu_memory:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: >-
            GPU memory trigger configuration. Calculated by
            `DCGM_FI_DEV_MEM_COPY_UTIL` metric
          examples:
            - threshold: 80
        gpu_utilization:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: >-
            GPU utilization trigger configuration. Calculated by
            `DCGM_FI_DEV_GPU_UTIL` metric
          examples:
            - threshold: 80
        http:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersRateSerializer'
            - type: 'null'
          default: null
          description: HTTP trigger configuration
          examples:
            - rate: 1
              window: 60
        memory:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: Memory trigger configuration
          examples:
            - threshold: 80
        sqs:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersSqsSerializer'
            - type: 'null'
          default: null
          description: SQS trigger configuration
      title: ContainerScaleTriggersSerializer
      type: object
    ContainerProbeSerializerV2:
      properties:
        exec:
          anyOf:
            - $ref: '#/components/schemas/ContainerProbeExecConfigSerializerV2'
            - type: 'null'
          default: null
          description: Exec probe configuration
        failure_threshold:
          default: 3
          description: >-
            The number of consecutive probe failures that mark the container as
            unhealthy.
          example: 3
          examples:
            - 3
          minimum: 1
          title: Failure Threshold
          type: integer
        http_get:
          anyOf:
            - $ref: '#/components/schemas/ContainerProbeHttpGetConfigSerializerV2'
            - type: 'null'
          default: null
          description: HTTP GET probe configuration
        initial_delay_seconds:
          default: 0
          description: The initial delay before starting the first probe.
          example: 0
          examples:
            - 0
            - 10
          minimum: 0
          title: Initial Delay Seconds
          type: integer
        period_seconds:
          default: 10
          description: How often (in seconds) to perform the probe.
          example: 5
          examples:
            - 5
            - 10
          minimum: 1
          title: Period Seconds
          type: integer
        success_threshold:
          default: 1
          description: >-
            The number of consecutive successful probes that mark the container
            as healthy.
          example: 1
          examples:
            - 1
          minimum: 1
          title: Success Threshold
          type: integer
        tcp_socket:
          anyOf:
            - $ref: '#/components/schemas/ContainerProbeTcpSocketConfigSerializerV2'
            - type: 'null'
          default: null
          description: TCP socket probe configuration
        timeout_seconds:
          default: 3
          description: The timeout for each probe.
          example: 1
          examples:
            - 1
            - 5
          minimum: 1
          title: Timeout Seconds
          type: integer
      title: ContainerProbeSerializerV2
      type: object
    ContainerScaleTriggersThresholdSerializer:
      properties:
        threshold:
          description: Threshold value for the trigger in percentage
          example: 75
          examples:
            - 75
          maximum: 100
          minimum: 1
          title: Threshold
          type: integer
      required:
        - threshold
      title: ContainerScaleTriggersThresholdSerializer
      type: object
    ContainerScaleTriggersRateSerializer:
      properties:
        rate:
          description: Request count per 'window' seconds for the http trigger
          example: 1
          examples:
            - 1
          maximum: 1000
          minimum: 1
          title: Rate
          type: integer
        window:
          description: Time window for rate calculation in seconds
          example: 60
          examples:
            - 60
          maximum: 3600
          minimum: 1
          title: Window
          type: integer
      required:
        - rate
        - window
      title: ContainerScaleTriggersRateSerializer
      type: object
    ContainerScaleTriggersSqsSerializer:
      properties:
        activation_queue_length:
          description: Number of messages for activation
          minimum: 1
          title: Activation Queue Length
          type: integer
        aws_endpoint:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Custom AWS endpoint
          title: Aws Endpoint
        aws_region:
          description: AWS region
          example: us-east-1
          examples:
            - us-east-1
          minLength: 1
          title: Aws Region
          type: string
        queue_length:
          description: Number of messages for one replica
          example: 10
          examples:
            - 10
          minimum: 1
          title: Queue Length
          type: integer
        queue_url:
          description: SQS queue URL
          example: https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          examples:
            - https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          minLength: 1
          title: Queue Url
          type: string
        scale_on_delayed:
          default: false
          description: Scale on delayed messages
          title: Scale On Delayed
          type: boolean
        scale_on_flight:
          default: false
          description: Scale on in-flight messages
          title: Scale On Flight
          type: boolean
        secret_name:
          description: Auth secret name
          minLength: 1
          title: Secret Name
          type: string
      required:
        - queue_url
        - queue_length
        - activation_queue_length
        - aws_region
        - secret_name
      title: ContainerScaleTriggersSqsSerializer
      type: object
    ContainerProbeExecConfigSerializerV2:
      properties:
        command:
          description: Command to be executed inside the running container.
          example:
            - ls
            - '-l'
          examples:
            - - ls
              - '-l'
          items:
            type: string
          title: Command
          type: array
      required:
        - command
      title: ContainerProbeExecConfigSerializerV2
      type: object
    ContainerProbeHttpGetConfigSerializerV2:
      properties:
        headers:
          additionalProperties:
            type: string
          description: HTTP headers to be sent with the request.
          example:
            Authorization: Bearer token 123
          examples:
            - Authorization: Bearer token 123
          title: Headers
          type: object
        host:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Host name to send HTTP request to.
          examples:
            - 127.0.0.1
          title: Host
        path:
          default: /
          description: The endpoint to send the HTTP request to.
          example: /healthz
          examples:
            - /healthz
            - /readiness
          title: Path
          type: string
        port:
          description: Port number the probe should connect to.
          example: 80
          examples:
            - 80
            - 8080
          maximum: 65535
          minimum: 1
          title: Port
          type: integer
        schema:
          default: HTTP
          description: Schema to use for the HTTP request.
          example: HTTP
          examples:
            - HTTP
            - HTTPS
          pattern: ^(HTTP|HTTPS)$
          title: Schema
          type: string
      required:
        - port
      title: ContainerProbeHttpGetConfigSerializerV2
      type: object
    ContainerProbeTcpSocketConfigSerializerV2:
      properties:
        port:
          description: Port number to check if it's open.
          example: 80
          examples:
            - 80
            - 8080
          maximum: 65535
          minimum: 1
          title: Port
          type: integer
      required:
        - port
      title: ContainerProbeTcpSocketConfigSerializerV2
      type: object
  securitySchemes:
    APIKey:
      description: >-
        API key for authentication. Make sure to include the word `apikey`,
        followed by a single space and then your token.

        Example: `apikey 1234$abcdef`
      type: apiKey
      in: header
      name: Authorization

````