> ## Documentation Index
> Fetch the complete documentation index at: https://gcore.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Update inference deployment



## OpenAPI

````yaml /api-reference/services_documented/cloud_api.yaml patch /cloud/v3/inference/{project_id}/deployments/{deployment_name}
openapi: 3.1.0
info:
  title: Gcore OpenAPI – Cloud API
  description: >-
    This OpenAPI is an aggregated OpenAPI specification that unifies all Gcore
    products into a single file. It covers Cloud, CDN, DNS, WAAP, DDoS
    Protection, Object Storage, Streaming, and FastEdge services.
  version: '2026-05-15T06:37:28.230198+00:00'
servers:
  - url: https://api.gcore.com
security:
  - APIKey: []
tags:
  - name: Bare Metal
    x-displayName: Bare Metal
  - name: Container as a Service
    x-displayName: Container as a Service
  - name: Cost Reports
    x-displayName: Cost Reports
  - name: DDoS Protection
    x-displayName: DDoS Protection
  - name: Everywhere Inference
    x-displayName: Everywhere Inference
  - name: Everywhere Inference Apps
    x-displayName: Everywhere Inference Apps
  - name: File Shares
    x-displayName: File Shares
  - name: Floating IPs
    x-displayName: Floating IPs
  - name: Function as a Service
    x-displayName: Function as a Service
  - name: GPU Bare Metal
    x-displayName: GPU Bare Metal
  - name: GPU Virtual
    x-displayName: GPU Virtual
  - name: IP Ranges
    x-displayName: IP Ranges
  - name: Images
    x-displayName: Images
  - name: Instances
    x-displayName: Instances
  - name: Load Balancers
    x-displayName: Load Balancers
  - name: Logging
    x-displayName: Logging
  - name: Managed Kubernetes
    x-displayName: Managed Kubernetes
  - name: Managed PostgreSQL
    x-displayName: Managed PostgreSQL
  - name: Networks
    x-displayName: Networks
  - name: Placement Groups
    x-displayName: Placement Groups
  - name: Projects
    x-displayName: Projects
  - name: Quotas
    x-displayName: Quotas
  - name: Regions
    x-displayName: Regions
  - name: Registry
    x-displayName: Registry
  - name: Reservations
    x-displayName: Reservations
  - name: Reserved IPs
    x-displayName: Reserved IPs
  - name: Routers
    x-displayName: Routers
  - name: SSH Keys
    x-displayName: SSH Keys
  - name: Secrets
    x-displayName: Secrets
  - name: Security Groups
    x-displayName: Security Groups
  - name: Snapshot Schedules
    x-displayName: Snapshot Schedules
  - name: Snapshots
    x-displayName: Snapshots
  - name: Tasks
    x-displayName: Tasks
  - name: User Actions
    x-displayName: User Actions
  - name: User Role Assignments
    x-displayName: User Role Assignments
  - name: Volumes
    x-displayName: Volumes
paths:
  /cloud/v3/inference/{project_id}/deployments/{deployment_name}:
    patch:
      tags:
        - Everywhere Inference
      summary: Update inference deployment
      operationId: InferenceInstanceHandlerV3.patch
      parameters:
        - in: path
          name: project_id
          required: true
          description: Project ID
          schema:
            description: Project ID
            example: 1
            examples:
              - 1
            title: Project Id
            type: integer
        - in: path
          name: deployment_name
          required: true
          description: Inference instance name.
          schema:
            description: Inference instance name.
            example: my-instance
            examples:
              - my-instance
            title: Deployment Name
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/InferenceInstanceInUpdateSerializerV3'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TaskIDsSerializer'
      x-codeSamples:
        - lang: Python
          source: |-
            import os
            from gcore import Gcore

            client = Gcore(
                api_key=os.environ.get("GCORE_API_KEY"),  # This is the default and can be omitted
            )
            task_id_list = client.cloud.inference.deployments.update(
                deployment_name="my-instance",
                project_id=1,
            )
            print(task_id_list.tasks)
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/G-Core/gcore-go\"\n\t\"github.com/G-Core/gcore-go/cloud\"\n\t\"github.com/G-Core/gcore-go/option\"\n)\n\nfunc main() {\n\tclient := gcore.NewClient(\n\t\toption.WithAPIKey(\"My API Key\"),\n\t)\n\ttaskIDList, err := client.Cloud.Inference.Deployments.Update(\n\t\tcontext.TODO(),\n\t\t\"my-instance\",\n\t\tcloud.InferenceDeploymentUpdateParams{\n\t\t\tProjectID: gcore.Int(1),\n\t\t},\n\t)\n\tif err != nil {\n\t\tpanic(err.Error())\n\t}\n\tfmt.Printf(\"%+v\\n\", taskIDList.Tasks)\n}\n"
components:
  schemas:
    InferenceInstanceInUpdateSerializerV3:
      properties:
        api_keys:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            List of API keys for the inference instance. Multiple keys can be
            attached to one deployment.If `auth_enabled` and `api_keys` are both
            specified, a ValidationError will be raised.If `[]` is provided, the
            API keys will be removed and auth will be disabled on the
            deployment.
          examples:
            - - key1
              - key2
          title: Api Keys
        auth_enabled:
          deprecated: true
          description: >-
            Set to `true` to enable API key authentication for the inference
            instance. `"Authorization": "Bearer *****"` or `"X-Api-Key":
            "*****"` header is required for the requests to the instance if
            enabled. This field is deprecated and will be removed in the future.
            Use `api_keys` field instead.If `auth_enabled` and `api_keys` are
            both specified, a ValidationError will be raised.
          example: false
          examples:
            - false
          title: Auth Enabled
          type: boolean
        command:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          default: null
          description: Command to be executed when running a container from an image.
          examples:
            - - nginx
              - '-g'
              - daemon off;
          title: Command
        containers:
          anyOf:
            - items:
                $ref: '#/components/schemas/ContainerInUpdateSerializerV3'
              minItems: 1
              type: array
            - type: 'null'
          default: null
          description: List of containers for the inference instance.
          examples:
            - - region_1: 1
                scale:
                  cooldown_period: 60
                  max: 3
                  min: 1
                  triggers:
                    cpu:
                      threshold: 80
                    memory:
                      threshold: 70
          title: Containers
        credentials_name:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Registry credentials name
          examples:
            - dockerhub
          title: Credentials Name
        description:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Inference instance description.
          examples:
            - My first instance
          title: Description
        envs:
          anyOf:
            - additionalProperties:
                type: string
              type: object
            - type: 'null'
          default: null
          description: Environment variables for the inference instance.
          examples:
            - DEBUG_MODE: 'False'
              KEY: '12345'
          title: Envs
        flavor_name:
          description: Flavor name for the inference instance.
          example: inference-16vcpu-232gib-1xh100-80gb
          examples:
            - inference-16vcpu-232gib-1xh100-80gb
          title: Flavor Name
          type: string
        image:
          anyOf:
            - pattern: >-
                ^(?:(?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*)(?::[A-Za-z0-9_][A-Za-z0-9_.-]{0,127})?$
              type: string
            - type: 'null'
          default: null
          description: >-
            Docker image for the inference instance. This field should contain
            the image name and tag in the format 'name:tag', e.g.,
            'nginx:latest'. It defaults to Docker Hub as the image registry, but
            any accessible Docker image URL can be specified.
          examples:
            - nginx:latest
          title: Image
        ingress_opts:
          anyOf:
            - $ref: '#/components/schemas/IngressOptsSerializer'
            - type: 'null'
          default: null
          description: Ingress options for the inference instance
          examples:
            - disable_response_buffering: true
        listening_port:
          anyOf:
            - maximum: 65535
              minimum: 1
              type: integer
            - type: 'null'
          default: null
          description: Listening port for the inference instance.
          examples:
            - 80
          title: Listening Port
        logging:
          anyOf:
            - $ref: '#/components/schemas/LoggingInSerializer'
            - type: 'null'
          default: null
          description: Logging configuration for the inference instance
          examples:
            - destination_region_id: 1
              enabled: true
              retention_policy:
                period: 42
              topic_name: my-log-name
            - enabled: false
        probes:
          anyOf:
            - $ref: '#/components/schemas/PatchInferenceInstanceProbesSerializerV2'
            - type: 'null'
          description: Probes configured for all containers of the inference instance.
        timeout:
          anyOf:
            - minimum: 0
              type: integer
            - type: 'null'
          default: null
          description: >-
            Specifies the duration in seconds without any requests after which
            the containers will be downscaled to their minimum scale value as
            defined by `scale.min`. If set, this helps in optimizing resource
            usage by reducing the number of container instances during periods
            of inactivity. The default value when the parameter is not set is
            120.
          examples:
            - 120
          title: Timeout
      title: InferenceInstanceInUpdateSerializerV3
      type: object
    TaskIDsSerializer:
      properties:
        tasks:
          description: >-
            List of task IDs representing asynchronous operations. Use these IDs
            to monitor operation progress:

            - `GET /v1/tasks/{task_id}` - Check individual task status and
            details

            Poll task status until completion (`FINISHED`/`ERROR`) before
            proceeding with dependent operations.
          example:
            - d478ae29-dedc-4869-82f0-96104425f565
          examples:
            - - d478ae29-dedc-4869-82f0-96104425f565
          items:
            type: string
          title: Tasks
          type: array
      required:
        - tasks
      title: TaskIDsSerializer
      type: object
    ContainerInUpdateSerializerV3:
      properties:
        region_id:
          description: Region id for the container
          example: '1337'
          examples:
            - '1337'
          title: Region Id
          type: integer
        scale:
          $ref: '#/components/schemas/ContainerScaleUpdateSerializerV3'
          description: Scale for the container
          examples:
            - max: 3
              min: 1
      required:
        - region_id
        - scale
      title: ContainerInUpdateSerializerV3
      type: object
    IngressOptsSerializer:
      properties:
        disable_response_buffering:
          default: false
          description: >-
            Disable response buffering if true. A client usually has a much
            slower connection and can not consume the response data as fast as
            it is produced by an upstream application. Ingress tries to buffer
            the whole response in order to release the upstream application as
            soon as possible.By default, the response buffering is enabled.
          example: true
          examples:
            - true
            - false
          title: Disable Response Buffering
          type: boolean
      title: IngressOptsSerializer
      type: object
    LoggingInSerializer:
      properties:
        destination_region_id:
          anyOf:
            - type: integer
            - type: 'null'
          default: null
          description: ID of the region in which the logs will be stored
          examples:
            - 1
          title: Destination Region Id
        enabled:
          default: false
          description: Enable or disable log streaming
          example: true
          examples:
            - true
            - false
          title: Enabled
          type: boolean
        retention_policy:
          anyOf:
            - $ref: '#/components/schemas/LaasIndexRetentionPolicyPydanticSerializer'
            - type: 'null'
          default: null
          description: Logs retention policy
          examples:
            - period: 45
        topic_name:
          anyOf:
            - maxLength: 223
              minLength: 1
              pattern: >-
                ^[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9](?:[-a-z0-9]*[a-z0-9])*)*$
              type: string
            - type: 'null'
          default: null
          description: The topic name to stream logs to
          examples:
            - my-log-name
          title: Topic Name
      title: LoggingInSerializer
      type: object
    PatchInferenceInstanceProbesSerializerV2:
      additionalProperties: false
      properties:
        liveness_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/PatchInferenceInstanceContainerProbeConfigurationInSerializerV2
            - type: 'null'
          description: Liveness probe configuration
        readiness_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/PatchInferenceInstanceContainerProbeConfigurationInSerializerV2
            - type: 'null'
          description: Readiness probe configuration
        startup_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/PatchInferenceInstanceContainerProbeConfigurationInSerializerV2
            - type: 'null'
          description: Startup probe configuration
      title: PatchInferenceInstanceProbesSerializerV2
      type: object
    ContainerScaleUpdateSerializerV3:
      properties:
        cooldown_period:
          description: Cooldown period between scaling actions in seconds
          example: 60
          examples:
            - 60
          maximum: 3600
          minimum: 1
          title: Cooldown Period
          type: integer
        max:
          description: Maximum scale for the container
          example: 3
          examples:
            - 3
          maximum: 300
          title: Max
          type: integer
        min:
          description: Minimum scale for the container
          example: 1
          examples:
            - 1
          minimum: 0
          title: Min
          type: integer
        polling_interval:
          description: Polling interval for scaling triggers in seconds
          example: 30
          examples:
            - 30
          maximum: 3600
          minimum: 5
          title: Polling Interval
          type: integer
        triggers:
          $ref: '#/components/schemas/ContainerScaleTriggersSerializer'
          description: Triggers for scaling actions
          examples:
            - cpu:
                threshold: 75
      required:
        - min
        - max
      title: ContainerScaleUpdateSerializerV3
      type: object
    LaasIndexRetentionPolicyPydanticSerializer:
      properties:
        period:
          anyOf:
            - exclusiveMinimum: 0
              maximum: 1825
              type: integer
            - type: 'null'
          description: Duration of days for which logs must be kept.
          examples:
            - 45
          title: Period
      required:
        - period
      title: LaasIndexRetentionPolicyPydanticSerializer
      type: object
    PatchInferenceInstanceContainerProbeConfigurationInSerializerV2:
      properties:
        enabled:
          description: Whether the probe is enabled or not.
          example: true
          examples:
            - true
            - false
          title: Enabled
          type: boolean
        probe:
          $ref: '#/components/schemas/PatchContainerProbeInSerializerV2'
          description: Probe configuration (exec, `http_get` or `tcp_socket`)
      title: PatchInferenceInstanceContainerProbeConfigurationInSerializerV2
      type: object
    ContainerScaleTriggersSerializer:
      properties:
        cpu:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: CPU trigger configuration
          examples:
            - threshold: 80
        gpu_memory:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: >-
            GPU memory trigger configuration. Calculated by
            `DCGM_FI_DEV_MEM_COPY_UTIL` metric
          examples:
            - threshold: 80
        gpu_utilization:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: >-
            GPU utilization trigger configuration. Calculated by
            `DCGM_FI_DEV_GPU_UTIL` metric
          examples:
            - threshold: 80
        http:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersRateSerializer'
            - type: 'null'
          default: null
          description: HTTP trigger configuration
          examples:
            - rate: 1
              window: 60
        memory:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersThresholdSerializer'
            - type: 'null'
          default: null
          description: Memory trigger configuration
          examples:
            - threshold: 80
        sqs:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersSqsSerializer'
            - type: 'null'
          default: null
          description: SQS trigger configuration
      title: ContainerScaleTriggersSerializer
      type: object
    PatchContainerProbeInSerializerV2:
      properties:
        exec:
          anyOf:
            - $ref: '#/components/schemas/PatchContainerProbeExecConfigSerializerV2'
            - type: 'null'
          description: Exec probe configuration
        failure_threshold:
          description: >-
            The number of consecutive probe failures that mark the container as
            unhealthy.
          example: 3
          examples:
            - 3
          minimum: 1
          title: Failure Threshold
          type: integer
        http_get:
          anyOf:
            - $ref: >-
                #/components/schemas/PatchContainerProbeHttpGetConfigSerializerV2
            - type: 'null'
          description: HTTP GET probe configuration
        initial_delay_seconds:
          description: The initial delay before starting the first probe.
          example: 0
          examples:
            - 0
            - 10
          minimum: 0
          title: Initial Delay Seconds
          type: integer
        period_seconds:
          description: How often (in seconds) to perform the probe.
          example: 5
          examples:
            - 5
            - 10
          minimum: 1
          title: Period Seconds
          type: integer
        success_threshold:
          description: >-
            The number of consecutive successful probes that mark the container
            as healthy.
          example: 1
          examples:
            - 1
          minimum: 1
          title: Success Threshold
          type: integer
        tcp_socket:
          anyOf:
            - $ref: >-
                #/components/schemas/PatchContainerProbeTcpSocketConfigSerializerV2
            - type: 'null'
          description: TCP socket probe configuration
        timeout_seconds:
          description: The timeout for each probe.
          example: 1
          examples:
            - 1
            - 5
          minimum: 1
          title: Timeout Seconds
          type: integer
      title: PatchContainerProbeInSerializerV2
      type: object
    ContainerScaleTriggersThresholdSerializer:
      properties:
        threshold:
          description: Threshold value for the trigger in percentage
          example: 75
          examples:
            - 75
          maximum: 100
          minimum: 1
          title: Threshold
          type: integer
      required:
        - threshold
      title: ContainerScaleTriggersThresholdSerializer
      type: object
    ContainerScaleTriggersRateSerializer:
      properties:
        rate:
          description: Request count per 'window' seconds for the http trigger
          example: 1
          examples:
            - 1
          maximum: 1000
          minimum: 1
          title: Rate
          type: integer
        window:
          description: Time window for rate calculation in seconds
          example: 60
          examples:
            - 60
          maximum: 3600
          minimum: 1
          title: Window
          type: integer
      required:
        - rate
        - window
      title: ContainerScaleTriggersRateSerializer
      type: object
    ContainerScaleTriggersSqsSerializer:
      properties:
        activation_queue_length:
          description: Number of messages for activation
          minimum: 1
          title: Activation Queue Length
          type: integer
        aws_endpoint:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Custom AWS endpoint
          title: Aws Endpoint
        aws_region:
          description: AWS region
          example: us-east-1
          examples:
            - us-east-1
          minLength: 1
          title: Aws Region
          type: string
        queue_length:
          description: Number of messages for one replica
          example: 10
          examples:
            - 10
          minimum: 1
          title: Queue Length
          type: integer
        queue_url:
          description: SQS queue URL
          example: https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          examples:
            - https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          minLength: 1
          title: Queue Url
          type: string
        scale_on_delayed:
          default: false
          description: Scale on delayed messages
          title: Scale On Delayed
          type: boolean
        scale_on_flight:
          default: false
          description: Scale on in-flight messages
          title: Scale On Flight
          type: boolean
        secret_name:
          description: Auth secret name
          minLength: 1
          title: Secret Name
          type: string
      required:
        - queue_url
        - queue_length
        - activation_queue_length
        - aws_region
        - secret_name
      title: ContainerScaleTriggersSqsSerializer
      type: object
    PatchContainerProbeExecConfigSerializerV2:
      properties:
        command:
          description: Command to be executed inside the running container.
          example:
            - ls
            - '-l'
          examples:
            - - ls
              - '-l'
          items:
            type: string
          title: Command
          type: array
      title: PatchContainerProbeExecConfigSerializerV2
      type: object
    PatchContainerProbeHttpGetConfigSerializerV2:
      properties:
        headers:
          additionalProperties:
            type: string
          description: HTTP headers to be sent with the request.
          example:
            Authorization: Bearer token 123
          examples:
            - Authorization: Bearer token 123
          title: Headers
          type: object
        host:
          description: Host name to send HTTP request to.
          example: 127.0.0.1
          examples:
            - 127.0.0.1
          title: Host
          type: string
        path:
          description: The endpoint to send the HTTP request to.
          example: /healthz
          examples:
            - /healthz
            - /readiness
          title: Path
          type: string
        port:
          description: Port number the probe should connect to.
          example: 80
          examples:
            - 80
            - 8080
          maximum: 65535
          minimum: 1
          title: Port
          type: integer
        schema:
          description: Schema to use for the HTTP request.
          example: HTTP
          examples:
            - HTTP
            - HTTPS
          pattern: ^(HTTP|HTTPS)$
          title: Schema
          type: string
      title: PatchContainerProbeHttpGetConfigSerializerV2
      type: object
    PatchContainerProbeTcpSocketConfigSerializerV2:
      properties:
        port:
          description: Port number to check if it's open.
          example: 80
          examples:
            - 80
            - 8080
          maximum: 65535
          minimum: 1
          title: Port
          type: integer
      title: PatchContainerProbeTcpSocketConfigSerializerV2
      type: object
  securitySchemes:
    APIKey:
      description: >-
        API key for authentication. Make sure to include the word `apikey`,
        followed by a single space and then your token.

        Example: `apikey 1234$abcdef`
      type: apiKey
      in: header
      name: Authorization

````