digitalocean · harshmaru7 · Mar 25, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/specification/DigitalOcean-public.v2.yaml b/specification/DigitalOcean-public.v2.yaml
@@ -55,6 +55,15 @@ tags:
       The Add-Ons API allows you to manage these resources, including creating, listing, and retrieving
       details about specific add-on resources.
 
+  - name: Agent Inference
+    description: |-
+      DigitalOcean Gradient™ AI Agentic Cloud allows you to create multi-agent workflows
+      to power your AI applications. This allows developers to integrate agents into your
+      AI applications.
+
+      **Note:** The Agent Inference API uses a customer-specific base URL (the agent endpoint)
+      and is independent of the main DigitalOcean control-plane API (`https://api.digitalocean.com`).
+
   - name: Apps
     description: |-
       App Platform is a Platform-as-a-Service (PaaS) offering from DigitalOcean that allows
@@ -525,6 +534,14 @@ tags:
     description: |-
       Security CSPM endpoints for scans, scan findings, and settings.
 
+  - name: Serverless Inference
+    description: |-
+      DigitalOcean Gradient™ AI Agentic Cloud allows access to serverless inference models.
+      You can access models by providing an inference key.
+
+      **Note:** The Serverless Inference API uses a separate base URL (`https://inference.do-ai.run`)
+      and is independent of the main DigitalOcean control-plane API (`https://api.digitalocean.com`).
+
   - name: Sizes
     description: |-
       The sizes objects represent different packages of hardware resources that
@@ -2731,6 +2748,31 @@ paths:
     get:
       $ref: 'resources/gen-ai/genai_list_evaluation_test_cases_by_workspace.yml'
 
+
+  /v1/chat/completions:
+    post:
+      $ref: 'resources/inference/inference_create_chat_completion.yml'
+
+  /api/v1/chat/completions:
+    post:
+      $ref: 'resources/inference/agent_inference_create_chat_completion.yml'
+
+  /v1/images/generations:
+    post:
+      $ref: 'resources/inference/inference_create_image.yml'
+
+  /v1/models:
+    get:
+      $ref: 'resources/inference/inference_list_models.yml'
+
+  /v1/responses:
+    post:
+      $ref: 'resources/inference/inference_create_response.yml'
+
+  /v1/async-invoke:
+    post:
+      $ref: 'resources/inference/inference_async_invoke.yml'
+
 components:
   securitySchemes:
     bearer_auth:

diff --git a/specification/resources/inference/agent_inference_create_chat_completion.yml b/specification/resources/inference/agent_inference_create_chat_completion.yml
@@ -0,0 +1,58 @@
+operationId: agentInference_create_chat_completion
+summary: Create a model response for the given chat conversation
+description: |-
+  Creates a model response for the given chat conversation via a customer-provisioned
+  agent endpoint.
+tags:
+  - Agent Inference
+servers:
+  - url: "https://{agent_url}"
+    description: production
+    variables:
+      agent_url:
+        default: "{your-agent-url}"
+        description: The agent URL assigned to your provisioned agent (e.g. fuauiziwb5xm6xka4c7aer5k.agents.do-ai.run).
+parameters:
+  - name: agent
+    in: query
+    required: true
+    schema:
+      type: boolean
+      default: true
+    description: Must be set to true for agent-based completion behavior.
+    example: true
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/chat_completion_request.yml"
+responses:
+  "200":
+    description: Successful chat completion. When stream is true, response is sent as Server-Sent Events (text/event-stream); otherwise a single JSON object (application/json) is returned.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/chat_completion_response.yml"
+      text/event-stream:
+        schema:
+          $ref: "models/chat_completion_chunk.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/agentInference_create_chat_completion.yml'
+security:
+  - bearer_auth: []
diff --git a/specification/resources/inference/examples/curl/agentInference_create_chat_completion.yml b/specification/resources/inference/examples/curl/agentInference_create_chat_completion.yml
@@ -0,0 +1,7 @@
+lang: cURL
+source: |-
+  curl -X POST \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer $AGENT_ACCESS_KEY" \
+    -d '{"messages": [{"role": "user", "content": "What is the capital of Portugal?"}], "model": "ignored"}' \
+    "https://$AGENT_URL/api/v1/chat/completions?agent=true"
diff --git a/specification/resources/inference/examples/curl/inference_async_invoke.yml b/specification/resources/inference/examples/curl/inference_async_invoke.yml
@@ -0,0 +1,12 @@
+lang: cURL
+source: |-
+  curl -X POST \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model_id": "fal-ai/flux/schnell",
+      "input": {
+        "prompt": "A futuristic city at sunset"
+      }
+    }' \
+    "https://inference.do-ai.run/v1/async-invoke"
diff --git a/specification/resources/inference/examples/curl/inference_async_invoke_audio.yml b/specification/resources/inference/examples/curl/inference_async_invoke_audio.yml
@@ -0,0 +1,17 @@
+lang: cURL
+label: Generate Audio
+source: |-
+  curl -X POST \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model_id": "fal-ai/stable-audio-25/text-to-audio",
+      "input": {
+        "prompt": "Techno song with futuristic sounds",
+        "seconds_total": 60
+      },
+      "tags": [
+        {"key": "type", "value": "test"}
+      ]
+    }' \
+    "https://inference.do-ai.run/v1/async-invoke"
diff --git a/specification/resources/inference/examples/curl/inference_async_invoke_tts.yml b/specification/resources/inference/examples/curl/inference_async_invoke_tts.yml
@@ -0,0 +1,16 @@
+lang: cURL
+label: Text-to-Speech
+source: |-
+  curl -X POST \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model_id": "fal-ai/elevenlabs/tts/multilingual-v2",
+      "input": {
+        "text": "This text-to-speech example uses DigitalOcean multilingual voice."
+      },
+      "tags": [
+        {"key": "type", "value": "test"}
+      ]
+    }' \
+    "https://inference.do-ai.run/v1/async-invoke"
diff --git a/specification/resources/inference/examples/curl/inference_create_chat_completion.yml b/specification/resources/inference/examples/curl/inference_create_chat_completion.yml
@@ -0,0 +1,7 @@
+lang: cURL
+source: |-
+  curl -X POST \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -d '{"messages": [{"role": "user", "content": "What is the capital of Portugal?"}], "model": "meta-llama/Meta-Llama-3.1-8B-Instruct"}' \
+    "https://inference.do-ai.run/v1/chat/completions"
diff --git a/specification/resources/inference/examples/curl/inference_create_image.yml b/specification/resources/inference/examples/curl/inference_create_image.yml
@@ -0,0 +1,7 @@
+lang: cURL
+source: |-
+  curl -X POST \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -d '{"prompt": "A cute baby sea otter floating on its back in calm blue water", "model": "openai-gpt-image-1", "size": "auto", "quality": "auto"}' \
+    "https://inference.do-ai.run/v1/images/generations"
diff --git a/specification/resources/inference/examples/curl/inference_create_response.yml b/specification/resources/inference/examples/curl/inference_create_response.yml
@@ -0,0 +1,13 @@
+lang: cURL
+source: |-
+  curl -sS -X POST \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "openai-gpt-oss-20b",
+      "input": "What is the capital of France?",
+      "max_output_tokens": 50,
+      "temperature": 0.7,
+      "stream": false
+    }' \
+    "https://inference.do-ai.run/v1/responses"
diff --git a/specification/resources/inference/examples/curl/inference_list_models.yml b/specification/resources/inference/examples/curl/inference_list_models.yml
@@ -0,0 +1,6 @@
+lang: cURL
+source: |-
+  curl -X GET \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer $MODEL_ACCESS_KEY" \
+    "https://inference.do-ai.run/v1/models"
diff --git a/specification/resources/inference/inference_async_invoke.yml b/specification/resources/inference/inference_async_invoke.yml
@@ -0,0 +1,66 @@
+operationId: inference_create_async_invoke
+summary: Generate Image, Audio, or Text-to-Speech Using fal Models
+description: >
+  Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint
+  starts an asynchronous job and returns a request_id. The job status is
+  QUEUED initially. Use the request_id to poll for the result.
+tags:
+  - Serverless Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/async_invoke_request.yml"
+      examples:
+        Image Generation:
+          value:
+            model_id: "fal-ai/flux/schnell"
+            input:
+              prompt: "A futuristic city at sunset"
+        Generate Audio:
+          value:
+            model_id: "fal-ai/stable-audio-25/text-to-audio"
+            input:
+              prompt: "Techno song with futuristic sounds"
+              seconds_total: 60
+            tags:
+              - key: "type"
+                value: "test"
+        Text-to-Speech:
+          value:
+            model_id: "fal-ai/elevenlabs/tts/multilingual-v2"
+            input:
+              text: "This text-to-speech example uses DigitalOcean multilingual voice."
+            tags:
+              - key: "type"
+                value: "test"
+responses:
+  "202":
+    description: The async invocation request was accepted.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/async_invoke_response.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_async_invoke.yml'
+security:
+  - bearer_auth: []
diff --git a/specification/resources/inference/inference_create_chat_completion.yml b/specification/resources/inference/inference_create_chat_completion.yml
@@ -0,0 +1,44 @@
+operationId: inference_create_chat_completion
+summary: Create a model response for the given chat conversation
+description: Creates a model response for the given chat conversation.
+tags:
+  - Serverless Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+x-inference-base-url: "https://inference.do-ai.run"
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/chat_completion_request.yml"
+responses:
+  "200":
+    description: Successful chat completion. When stream is true, response is sent as Server-Sent Events (text/event-stream); otherwise a single JSON object (application/json) is returned.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/chat_completion_response.yml"
+      text/event-stream:
+        schema:
+          $ref: "models/chat_completion_chunk.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_create_chat_completion.yml'
+security:
+  - bearer_auth: []
diff --git a/specification/resources/inference/inference_create_image.yml b/specification/resources/inference/inference_create_image.yml
@@ -0,0 +1,43 @@
+operationId: inference_create_image
+summary: Generate images from text prompts
+description: Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation model with automatic prompt optimization and enhanced visual capabilities.
+tags:
+  - Serverless Inference
+servers:
+  - url: "https://inference.do-ai.run"
+    description: production
+requestBody:
+  required: true
+  content:
+    application/json:
+      schema:
+        $ref: "models/create_image_request.yml"
+responses:
+  "200":
+    description: Successful image generation. When stream is true, response is sent as Server-Sent Events (text/event-stream); otherwise a single JSON object (application/json) is returned.
+    headers:
+      ratelimit-limit:
+        $ref: '../../shared/headers.yml#/ratelimit-limit'
+      ratelimit-remaining:
+        $ref: '../../shared/headers.yml#/ratelimit-remaining'
+      ratelimit-reset:
+        $ref: '../../shared/headers.yml#/ratelimit-reset'
+    content:
+      application/json:
+        schema:
+          $ref: "models/images_response.yml"
+      text/event-stream:
+        schema:
+          $ref: "models/image_gen_partial_image_event.yml"
+  "401":
+    $ref: '../../shared/responses/unauthorized.yml'
+  "429":
+    $ref: '../../shared/responses/too_many_requests.yml'
+  "500":
+    $ref: '../../shared/responses/server_error.yml'
+  default:
+    $ref: '../../shared/responses/unexpected_error.yml'
+x-codeSamples:
+  - $ref: 'examples/curl/inference_create_image.yml'
+security:
+  - bearer_auth: []