> ## Documentation Index
> Fetch the complete documentation index at: https://gcore.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Get inference deployment



## OpenAPI

````yaml /api-reference/services_documented/cloud_api.yaml get /cloud/v3/inference/{project_id}/deployments/{deployment_name}
openapi: 3.1.0
info:
  title: Gcore OpenAPI – Cloud API
  description: >-
    This OpenAPI is an aggregated OpenAPI specification that unifies all Gcore
    products into a single file. It covers Cloud, CDN, DNS, WAAP, DDoS
    Protection, Object Storage, Streaming, and FastEdge services.
  version: '2026-05-07T20:33:46.548242+00:00'
servers:
  - url: https://api.gcore.com
security:
  - APIKey: []
tags:
  - name: Bare Metal
    x-displayName: Bare Metal
  - name: Container as a Service
    x-displayName: Container as a Service
  - name: Cost Reports
    x-displayName: Cost Reports
  - name: DDoS Protection
    x-displayName: DDoS Protection
  - name: Everywhere Inference
    x-displayName: Everywhere Inference
  - name: Everywhere Inference Apps
    x-displayName: Everywhere Inference Apps
  - name: File Shares
    x-displayName: File Shares
  - name: Floating IPs
    x-displayName: Floating IPs
  - name: Function as a Service
    x-displayName: Function as a Service
  - name: GPU Bare Metal
    x-displayName: GPU Bare Metal
  - name: GPU Virtual
    x-displayName: GPU Virtual
  - name: IP Ranges
    x-displayName: IP Ranges
  - name: Images
    x-displayName: Images
  - name: Instances
    x-displayName: Instances
  - name: Load Balancers
    x-displayName: Load Balancers
  - name: Logging
    x-displayName: Logging
  - name: Managed Kubernetes
    x-displayName: Managed Kubernetes
  - name: Managed PostgreSQL
    x-displayName: Managed PostgreSQL
  - name: Networks
    x-displayName: Networks
  - name: Placement Groups
    x-displayName: Placement Groups
  - name: Projects
    x-displayName: Projects
  - name: Quotas
    x-displayName: Quotas
  - name: Regions
    x-displayName: Regions
  - name: Registry
    x-displayName: Registry
  - name: Reservations
    x-displayName: Reservations
  - name: Reserved IPs
    x-displayName: Reserved IPs
  - name: Routers
    x-displayName: Routers
  - name: SSH Keys
    x-displayName: SSH Keys
  - name: Secrets
    x-displayName: Secrets
  - name: Security Groups
    x-displayName: Security Groups
  - name: Snapshot Schedules
    x-displayName: Snapshot Schedules
  - name: Snapshots
    x-displayName: Snapshots
  - name: Tasks
    x-displayName: Tasks
  - name: User Actions
    x-displayName: User Actions
  - name: User Role Assignments
    x-displayName: User Role Assignments
  - name: Volumes
    x-displayName: Volumes
paths:
  /cloud/v3/inference/{project_id}/deployments/{deployment_name}:
    get:
      tags:
        - Everywhere Inference
      summary: Get inference deployment
      operationId: InferenceInstanceHandlerV3.get
      parameters:
        - in: path
          name: project_id
          required: true
          description: Project ID
          schema:
            description: Project ID
            example: 1
            examples:
              - 1
            title: Project Id
            type: integer
        - in: path
          name: deployment_name
          required: true
          description: Inference instance name.
          schema:
            description: Inference instance name.
            example: my-instance
            examples:
              - my-instance
            title: Deployment Name
            type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/InferenceInstanceOutSerializerV3'
      x-codeSamples:
        - lang: Python
          source: |-
            import os
            from gcore import Gcore

            client = Gcore(
                api_key=os.environ.get("GCORE_API_KEY"),  # This is the default and can be omitted
            )
            inference_deployment = client.cloud.inference.deployments.get(
                deployment_name="my-instance",
                project_id=1,
            )
            print(inference_deployment.project_id)
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/G-Core/gcore-go\"\n\t\"github.com/G-Core/gcore-go/cloud\"\n\t\"github.com/G-Core/gcore-go/option\"\n)\n\nfunc main() {\n\tclient := gcore.NewClient(\n\t\toption.WithAPIKey(\"My API Key\"),\n\t)\n\tinferenceDeployment, err := client.Cloud.Inference.Deployments.Get(\n\t\tcontext.TODO(),\n\t\t\"my-instance\",\n\t\tcloud.InferenceDeploymentGetParams{\n\t\t\tProjectID: gcore.Int(1),\n\t\t},\n\t)\n\tif err != nil {\n\t\tpanic(err.Error())\n\t}\n\tfmt.Printf(\"%+v\\n\", inferenceDeployment.ProjectID)\n}\n"
components:
  schemas:
    InferenceInstanceOutSerializerV3:
      properties:
        address:
          anyOf:
            - format: uri
              minLength: 1
              type: string
            - type: 'null'
          description: Address of the inference instance
          examples:
            - https://example.com
          title: Address
        api_keys:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          default: null
          description: List of API keys for the inference instance
          examples:
            - - key1
              - key2
          title: Api Keys
        auth_enabled:
          deprecated: true
          description: >-
            `true` if instance uses API key authentication. `"Authorization":
            "Bearer *****"` or `"X-Api-Key": "*****"` header is required for the
            requests to the instance if enabled.
          example: false
          examples:
            - false
          title: Auth Enabled
          type: boolean
        command:
          anyOf:
            - type: string
            - type: 'null'
          description: Command to be executed when running a container from an image.
          examples:
            - - nginx
              - '-g'
              - daemon off;
          title: Command
        containers:
          description: List of containers for the inference instance
          example:
            - deploy_status:
                ready: 1
                total: 3
              region_id: 1
              scale:
                cooldown_period: 60
                max: 3
                min: 1
                triggers:
                  cpu:
                    threshold: 80
                  memory:
                    threshold: 70
          examples:
            - - deploy_status:
                  ready: 1
                  total: 3
                region_id: 1
                scale:
                  cooldown_period: 60
                  max: 3
                  min: 1
                  triggers:
                    cpu:
                      threshold: 80
                    memory:
                      threshold: 70
          items:
            $ref: '#/components/schemas/ContainerOutSerializerV3'
          title: Containers
          type: array
        created_at:
          anyOf:
            - type: string
            - type: 'null'
          description: Inference instance creation date in ISO 8601 format.
          examples:
            - '2023-08-22T11:21:00Z'
          title: Created At
        credentials_name:
          description: Registry credentials name
          example: dockerhub
          examples:
            - dockerhub
          title: Credentials Name
          type: string
        description:
          description: Inference instance description.
          example: My first instance
          examples:
            - My first instance
          title: Description
          type: string
        envs:
          anyOf:
            - additionalProperties:
                type: string
              type: object
            - type: 'null'
          description: Environment variables for the inference instance
          examples:
            - DEBUG_MODE: 'False'
              KEY: '12345'
          title: Envs
        flavor_name:
          description: Flavor name for the inference instance
          example: inference-16vcpu-232gib-1xh100-80gb
          examples:
            - inference-16vcpu-232gib-1xh100-80gb
          title: Flavor Name
          type: string
        image:
          description: >-
            Docker image for the inference instance. This field should contain
            the image name and tag in the format 'name:tag', e.g.,
            'nginx:latest'. It defaults to Docker Hub as the image registry, but
            any accessible Docker image URL can be specified.
          example: nginx:latest
          examples:
            - nginx:latest
          title: Image
          type: string
        ingress_opts:
          anyOf:
            - $ref: '#/components/schemas/IngressOptsOutSerializer'
            - type: 'null'
          description: Ingress options for the inference instance
          examples:
            - disable_response_buffering: true
        listening_port:
          description: Listening port for the inference instance.
          example: 8080
          examples:
            - 8080
          title: Listening Port
          type: integer
        logging:
          anyOf:
            - $ref: '#/components/schemas/LoggingOutSerializer'
            - type: 'null'
          description: Logging configuration for the inference instance
          examples:
            - destination_region_id: 1
              enabled: true
              retention_policy:
                period: 45
              topic_name: my-log-name
        name:
          description: Inference instance name.
          example: my-instance
          examples:
            - my-instance
          title: Name
          type: string
        object_references:
          description: Indicates to which parent object this inference belongs to.
          items:
            $ref: '#/components/schemas/InferenceObjectReferenceSerializer'
          title: Object References
          type: array
        probes:
          anyOf:
            - $ref: '#/components/schemas/InferenceInstanceProbesOutSerializerV2'
            - type: 'null'
          description: Probes configured for all containers of the inference instance.
        project_id:
          description: Project ID. If not provided, your default project ID will be used.
          example: 1
          examples:
            - 1
          title: Project Id
          type: integer
        status:
          $ref: '#/components/schemas/InferenceInstanceStatusV3'
          description: >-
            Inference instance status.


            Value can be one of the following:

            - `DEPLOYING` - The instance is being deployed. Containers are not
            yet created.

            - `PARTIALLYDEPLOYED` - All containers have been created, but some
            may not be ready yet. Instances stuck in this state typically
            indicate either image being pulled, or a failure of some kind. In
            the latter case, the `error_message` field of the respective
            container object in the `containers` collection explains the failure
            reason.

            - `ACTIVE` - The instance is running and ready to accept requests.

            - `DISABLED` - The instance is disabled and not accepting any
            requests.

            - `PENDING` - The instance is running but scaled to zero. It will be
            automatically scaled up when a request is made.

            - `DELETING` - The instance is being deleted.
          examples:
            - ACTIVE
        timeout:
          anyOf:
            - minimum: 0
              type: integer
            - type: 'null'
          description: >-
            Specifies the duration in seconds without any requests after which
            the containers will be downscaled to their minimum scale value as
            defined by `scale.min`. If set, this helps in optimizing resource
            usage by reducing the number of container instances during periods
            of inactivity.
          examples:
            - 120
          title: Timeout
      required:
        - project_id
        - name
        - description
        - image
        - listening_port
        - created_at
        - status
        - auth_enabled
        - address
        - containers
        - timeout
        - envs
        - flavor_name
        - command
        - credentials_name
        - logging
        - probes
        - ingress_opts
        - object_references
      title: InferenceInstanceOutSerializerV3
      type: object
    ContainerOutSerializerV3:
      properties:
        address:
          anyOf:
            - format: uri
              minLength: 1
              type: string
            - type: 'null'
          description: Address of the inference instance
          examples:
            - https://example.com
          title: Address
        deploy_status:
          $ref: '#/components/schemas/DeployStatusSerializer'
          description: Status of the containers deployment
          examples:
            - ready: 1
              total: 3
        error_message:
          anyOf:
            - type: string
            - type: 'null'
          description: Error message if the container deployment failed
          examples:
            - Failed to pull image
            - No capacity available in the region
          title: Error Message
        region_id:
          description: Region name for the container
          example: 1
          examples:
            - 1
          title: Region Id
          type: integer
        scale:
          $ref: '#/components/schemas/ContainerScaleOutSerializerV3'
          description: Scale for the container
          examples:
            - cooldown_period: 60
              max: 3
              min: 1
              triggers:
                cpu:
                  threshold: 80
                memory:
                  threshold: 70
      required:
        - region_id
        - address
        - scale
        - deploy_status
        - error_message
      title: ContainerOutSerializerV3
      type: object
    IngressOptsOutSerializer:
      properties:
        disable_response_buffering:
          description: >-
            Disable response buffering if true. A client usually has a much
            slower connection and can not consume the response data as fast as
            it is produced by an upstream application. Ingress tries to buffer
            the whole response in order to release the upstream application as
            soon as possible.By default, the response buffering is enabled.
          example: true
          examples:
            - true
            - false
          title: Disable Response Buffering
          type: boolean
      required:
        - disable_response_buffering
      title: IngressOptsOutSerializer
      type: object
    LoggingOutSerializer:
      properties:
        destination_region_id:
          anyOf:
            - type: integer
            - type: 'null'
          description: ID of the region in which the logs will be stored
          examples:
            - 1
          title: Destination Region Id
        enabled:
          description: Indicates if log streaming is enabled or disabled
          example: true
          examples:
            - true
            - false
          title: Enabled
          type: boolean
        retention_policy:
          anyOf:
            - $ref: '#/components/schemas/LaasIndexRetentionPolicyPydanticSerializer'
            - type: 'null'
          default: null
          description: Logs retention policy
          examples:
            - period: 45
        topic_name:
          anyOf:
            - type: string
            - type: 'null'
          description: The topic name to stream logs to
          examples:
            - my-log-name
          title: Topic Name
      required:
        - enabled
        - destination_region_id
        - topic_name
      title: LoggingOutSerializer
      type: object
    InferenceObjectReferenceSerializer:
      properties:
        kind:
          $ref: '#/components/schemas/InferenceKind'
          description: Kind of the inference object to be referenced
          examples:
            - AppDeployment
        name:
          description: Name of the inference object to be referenced
          example: my-inference-app
          examples:
            - my-inference-app
          title: Name
          type: string
      required:
        - name
        - kind
      title: InferenceObjectReferenceSerializer
      type: object
    InferenceInstanceProbesOutSerializerV2:
      additionalProperties: false
      properties:
        liveness_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/InferenceInstanceContainerProbeConfigurationOutSerializerV2
            - type: 'null'
          description: Liveness probe configuration
        readiness_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/InferenceInstanceContainerProbeConfigurationOutSerializerV2
            - type: 'null'
          description: Readiness probe configuration
        startup_probe:
          anyOf:
            - $ref: >-
                #/components/schemas/InferenceInstanceContainerProbeConfigurationOutSerializerV2
            - type: 'null'
          description: Startup probe configuration
      required:
        - liveness_probe
        - readiness_probe
        - startup_probe
      title: InferenceInstanceProbesOutSerializerV2
      type: object
    InferenceInstanceStatusV3:
      enum:
        - ACTIVE
        - DELETING
        - DEPLOYING
        - DISABLED
        - PARTIALLYDEPLOYED
        - PENDING
      title: InferenceInstanceStatusV3
      type: string
    DeployStatusSerializer:
      properties:
        ready:
          description: Number of ready instances
          example: 1
          examples:
            - 1
          title: Ready
          type: integer
        total:
          description: Total number of instances
          example: 2
          examples:
            - 2
          title: Total
          type: integer
      required:
        - total
        - ready
      title: DeployStatusSerializer
      type: object
    ContainerScaleOutSerializerV3:
      properties:
        cooldown_period:
          anyOf:
            - type: integer
            - type: 'null'
          description: Cooldown period between scaling actions in seconds
          examples:
            - 60
          title: Cooldown Period
        max:
          description: Maximum scale for the container
          example: 3
          examples:
            - 3
          title: Max
          type: integer
        min:
          description: Minimum scale for the container
          example: 1
          examples:
            - 1
          title: Min
          type: integer
        polling_interval:
          anyOf:
            - type: integer
            - type: 'null'
          description: Polling interval for scaling triggers in seconds
          examples:
            - 30
          title: Polling Interval
        triggers:
          $ref: '#/components/schemas/ContainerScaleTriggersOutSerializer'
          description: Triggers for scaling actions
          examples:
            - cpu:
                threshold: 75
      required:
        - min
        - max
        - cooldown_period
        - polling_interval
        - triggers
      title: ContainerScaleOutSerializerV3
      type: object
    LaasIndexRetentionPolicyPydanticSerializer:
      properties:
        period:
          anyOf:
            - exclusiveMinimum: 0
              maximum: 1825
              type: integer
            - type: 'null'
          description: Duration of days for which logs must be kept.
          examples:
            - 45
          title: Period
      required:
        - period
      title: LaasIndexRetentionPolicyPydanticSerializer
      type: object
    InferenceKind:
      enum:
        - AppDeployment
      title: InferenceKind
      type: string
    InferenceInstanceContainerProbeConfigurationOutSerializerV2:
      properties:
        enabled:
          description: Whether the probe is enabled or not.
          example: true
          examples:
            - true
            - false
          title: Enabled
          type: boolean
        probe:
          anyOf:
            - $ref: '#/components/schemas/ContainerProbeOutSerializerV2'
            - type: 'null'
          description: Probe configuration (exec, `http_get` or `tcp_socket`)
      required:
        - enabled
        - probe
      title: InferenceInstanceContainerProbeConfigurationOutSerializerV2
      type: object
    ContainerScaleTriggersOutSerializer:
      properties:
        cpu:
          anyOf:
            - $ref: >-
                #/components/schemas/ContainerScaleTriggersThresholdOutSerializer
            - type: 'null'
          description: CPU trigger configuration
          examples:
            - threshold: 80
        gpu_memory:
          anyOf:
            - $ref: >-
                #/components/schemas/ContainerScaleTriggersThresholdOutSerializer
            - type: 'null'
          description: >-
            GPU memory trigger configuration. Calculated by
            `DCGM_FI_DEV_MEM_COPY_UTIL` metric
          examples:
            - threshold: 80
        gpu_utilization:
          anyOf:
            - $ref: >-
                #/components/schemas/ContainerScaleTriggersThresholdOutSerializer
            - type: 'null'
          description: >-
            GPU utilization trigger configuration. Calculated by
            `DCGM_FI_DEV_GPU_UTIL` metric
          examples:
            - threshold: 80
        http:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersRateOutSerializer'
            - type: 'null'
          description: HTTP trigger configuration
          examples:
            - rate: 1
              window: 60
        memory:
          anyOf:
            - $ref: >-
                #/components/schemas/ContainerScaleTriggersThresholdOutSerializer
            - type: 'null'
          description: Memory trigger configuration
          examples:
            - threshold: 80
        sqs:
          anyOf:
            - $ref: '#/components/schemas/ContainerScaleTriggersSqsOutSerializer'
            - type: 'null'
          description: SQS trigger configuration
      required:
        - cpu
        - memory
        - gpu_utilization
        - gpu_memory
        - http
        - sqs
      title: ContainerScaleTriggersOutSerializer
      type: object
    ContainerProbeOutSerializerV2:
      properties:
        exec:
          anyOf:
            - $ref: '#/components/schemas/ContainerProbeExecConfigOutSerializerV2'
            - type: 'null'
          description: Exec probe configuration
        failure_threshold:
          description: >-
            The number of consecutive probe failures that mark the container as
            unhealthy.
          example: 3
          examples:
            - 3
          title: Failure Threshold
          type: integer
        http_get:
          anyOf:
            - $ref: '#/components/schemas/ContainerProbeHttpGetConfigOutSerializerV2'
            - type: 'null'
          description: HTTP GET probe configuration
        initial_delay_seconds:
          description: The initial delay before starting the first probe.
          example: 0
          examples:
            - 0
            - 10
          title: Initial Delay Seconds
          type: integer
        period_seconds:
          description: How often (in seconds) to perform the probe.
          example: 5
          examples:
            - 5
            - 10
          title: Period Seconds
          type: integer
        success_threshold:
          description: >-
            The number of consecutive successful probes that mark the container
            as healthy.
          example: 1
          examples:
            - 1
          title: Success Threshold
          type: integer
        tcp_socket:
          anyOf:
            - $ref: >-
                #/components/schemas/ContainerProbeTcpSocketConfigOutSerializerV2
            - type: 'null'
          description: TCP socket probe configuration
        timeout_seconds:
          description: The timeout for each probe.
          example: 1
          examples:
            - 1
            - 5
          title: Timeout Seconds
          type: integer
      required:
        - failure_threshold
        - initial_delay_seconds
        - period_seconds
        - timeout_seconds
        - success_threshold
        - exec
        - tcp_socket
        - http_get
      title: ContainerProbeOutSerializerV2
      type: object
    ContainerScaleTriggersThresholdOutSerializer:
      properties:
        threshold:
          description: Threshold value for the trigger in percentage
          example: 75
          examples:
            - 75
          title: Threshold
          type: integer
      required:
        - threshold
      title: ContainerScaleTriggersThresholdOutSerializer
      type: object
    ContainerScaleTriggersRateOutSerializer:
      properties:
        rate:
          description: Request count per 'window' seconds for the http trigger
          example: 1
          examples:
            - 1
          title: Rate
          type: integer
        window:
          description: Time window for rate calculation in seconds
          example: 60
          examples:
            - 60
          title: Window
          type: integer
      required:
        - rate
        - window
      title: ContainerScaleTriggersRateOutSerializer
      type: object
    ContainerScaleTriggersSqsOutSerializer:
      properties:
        activation_queue_length:
          description: Number of messages for activation
          example: 5
          examples:
            - 5
          title: Activation Queue Length
          type: integer
        aws_endpoint:
          anyOf:
            - type: string
            - type: 'null'
          description: Custom AWS endpoint
          title: Aws Endpoint
        aws_region:
          description: AWS region
          example: us-east-1
          examples:
            - us-east-1
          title: Aws Region
          type: string
        queue_length:
          description: Number of messages for one replica
          example: 10
          examples:
            - 10
          title: Queue Length
          type: integer
        queue_url:
          description: SQS queue URL
          example: https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          examples:
            - https://sqs.us-east-1.amazonaws.com/123456789012/MyQueue
          title: Queue Url
          type: string
        scale_on_delayed:
          description: Scale on delayed messages
          example: true
          examples:
            - true
            - false
          title: Scale On Delayed
          type: boolean
        scale_on_flight:
          description: Scale on in-flight messages
          example: true
          examples:
            - true
            - false
          title: Scale On Flight
          type: boolean
        secret_name:
          description: Auth secret name
          title: Secret Name
          type: string
      required:
        - queue_url
        - queue_length
        - activation_queue_length
        - scale_on_flight
        - scale_on_delayed
        - aws_region
        - aws_endpoint
        - secret_name
      title: ContainerScaleTriggersSqsOutSerializer
      type: object
    ContainerProbeExecConfigOutSerializerV2:
      properties:
        command:
          description: Command to be executed inside the running container.
          example:
            - ls
            - '-l'
          examples:
            - - ls
              - '-l'
          items:
            type: string
          title: Command
          type: array
      required:
        - command
      title: ContainerProbeExecConfigOutSerializerV2
      type: object
    ContainerProbeHttpGetConfigOutSerializerV2:
      properties:
        headers:
          additionalProperties:
            type: string
          description: HTTP headers to be sent with the request.
          example:
            Authorization: Bearer token 123
          examples:
            - Authorization: Bearer token 123
          title: Headers
          type: object
        host:
          anyOf:
            - type: string
            - type: 'null'
          description: Host name to send HTTP request to.
          examples:
            - 127.0.0.1
          title: Host
        path:
          description: The endpoint to send the HTTP request to.
          example: /healthz
          examples:
            - /healthz
            - /readiness
          title: Path
          type: string
        port:
          description: Port number the probe should connect to.
          example: 80
          examples:
            - 80
            - 8080
          title: Port
          type: integer
        schema:
          description: Schema to use for the HTTP request.
          example: HTTP
          examples:
            - HTTP
            - HTTPS
          title: Schema
          type: string
      required:
        - headers
        - host
        - path
        - port
        - schema
      title: ContainerProbeHttpGetConfigOutSerializerV2
      type: object
    ContainerProbeTcpSocketConfigOutSerializerV2:
      properties:
        port:
          description: Port number to check if it's open.
          example: 80
          examples:
            - 80
            - 8080
          title: Port
          type: integer
      required:
        - port
      title: ContainerProbeTcpSocketConfigOutSerializerV2
      type: object
  securitySchemes:
    APIKey:
      description: >-
        API key for authentication. Make sure to include the word `apikey`,
        followed by a single space and then your token.

        Example: `apikey 1234$abcdef`
      type: apiKey
      in: header
      name: Authorization

````