diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 42a31c2a..9fb3fd00 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,7 +19,7 @@ jobs:
     runs-on: ${{ github.repository == 'stainless-sdks/gradient-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Install Rye
         run: |
@@ -44,7 +44,7 @@ jobs:
       id-token: write
     runs-on: ${{ github.repository == 'stainless-sdks/gradient-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Install Rye
         run: |
@@ -81,7 +81,7 @@ jobs:
     runs-on: ${{ github.repository == 'stainless-sdks/gradient-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Install Rye
         run: |
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index cb2d6509..2b8edb2f 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Install Rye
         run: |
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 9c8912bc..bde3b3ed 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -12,7 +12,7 @@ jobs:
     if: github.repository == 'digitalocean/gradient-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Check release environment
         run: |
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index cc4da81f..940f2ca3 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "3.10.1"
+  ".": "3.11.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index d321100e..0d591538 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 189
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradient-0778b2e9d56c826f92ee69ef081d8d73fd94c139b85e11becaa88bf1cbe95fb9.yml
-openapi_spec_hash: 49daca0dd735cad7200ca1c741a5dd43
-config_hash: fad48c8ac796b240fe3b90181586d1a4
+configured_endpoints: 193
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradient-2344b44246a44d39ad5b74d3077bd2958745aad67feb15970756532fa0b3f9d6.yml
+openapi_spec_hash: a1913979235ce152a8dc380fabe5362e
+config_hash: 6c9a04f3cc5dd88e1e4f0ae42d98ba9a
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a82ede75..e9caf7e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,23 @@
 # Changelog
 
+## 3.11.0 (2026-02-17)
+
+Full Changelog: [v3.10.1...v3.11.0](https://github.com/digitalocean/gradient-python/compare/v3.10.1...v3.11.0)
+
+### Features
+
+* **api:** api update ([f67228f](https://github.com/digitalocean/gradient-python/commit/f67228f0bd8de96342d15b1b7c3096e4b03c7aaa))
+
+
+### Bug Fixes
+
+* **client:** loosen auth header validation ([bf5fa9f](https://github.com/digitalocean/gradient-python/commit/bf5fa9f93b6443ac0b41d5fa134aab8ee12cfbc3))
+
+
+### Chores
+
+* **internal:** codegen related update ([07eeda8](https://github.com/digitalocean/gradient-python/commit/07eeda8ae3e1c7bdfe3fcc51a8786f35682f4c3f))
+
 ## 3.10.1 (2025-12-19)
 
 Full Changelog: [v3.10.0...v3.10.1](https://github.com/digitalocean/gradient-python/compare/v3.10.0...v3.10.1)
diff --git a/api.md b/api.md
index caf241a4..45e4eaeb 100644
--- a/api.md
+++ b/api.md
@@ -10,6 +10,8 @@ from gradient.types import (
     ChatCompletionChunk,
     ChatCompletionTokenLogprob,
     CompletionUsage,
+    CreateResponseResponse,
+    CreateResponseStreamResponse,
     DiskInfo,
     Droplet,
     DropletNextBackupWindow,
@@ -404,6 +406,12 @@ Methods:
 
 - <code title="post /images/generations">client.images.<a href="./src/gradient/resources/images.py">generate</a>(\*\*<a href="src/gradient/types/image_generate_params.py">params</a>) -> <a href="./src/gradient/types/image_generate_response.py">ImageGenerateResponse</a></code>
 
+# Responses
+
+Methods:
+
+- <code title="post /responses">client.responses.<a href="./src/gradient/resources/responses.py">create</a>(\*\*<a href="src/gradient/types/response_create_params.py">params</a>) -> <a href="./src/gradient/types/shared/create_response_response.py">CreateResponseResponse</a></code>
+
 # GPUDroplets
 
 Types:
@@ -856,6 +864,7 @@ from gradient.types.knowledge_bases import (
     APIWebCrawlerDataSource,
     AwsDataSource,
     DataSourceCreateResponse,
+    DataSourceUpdateResponse,
     DataSourceListResponse,
     DataSourceDeleteResponse,
     DataSourceCreatePresignedURLsResponse,
@@ -865,6 +874,7 @@ from gradient.types.knowledge_bases import (
 Methods:
 
 - <code title="post /v2/gen-ai/knowledge_bases/{knowledge_base_uuid}/data_sources">client.knowledge_bases.data_sources.<a href="./src/gradient/resources/knowledge_bases/data_sources.py">create</a>(path_knowledge_base_uuid, \*\*<a href="src/gradient/types/knowledge_bases/data_source_create_params.py">params</a>) -> <a href="./src/gradient/types/knowledge_bases/data_source_create_response.py">DataSourceCreateResponse</a></code>
+- <code title="put /v2/gen-ai/knowledge_bases/{knowledge_base_uuid}/data_sources/{data_source_uuid}">client.knowledge_bases.data_sources.<a href="./src/gradient/resources/knowledge_bases/data_sources.py">update</a>(path_data_source_uuid, \*, path_knowledge_base_uuid, \*\*<a href="src/gradient/types/knowledge_bases/data_source_update_params.py">params</a>) -> <a href="./src/gradient/types/knowledge_bases/data_source_update_response.py">DataSourceUpdateResponse</a></code>
 - <code title="get /v2/gen-ai/knowledge_bases/{knowledge_base_uuid}/data_sources">client.knowledge_bases.data_sources.<a href="./src/gradient/resources/knowledge_bases/data_sources.py">list</a>(knowledge_base_uuid, \*\*<a href="src/gradient/types/knowledge_bases/data_source_list_params.py">params</a>) -> <a href="./src/gradient/types/knowledge_bases/data_source_list_response.py">DataSourceListResponse</a></code>
 - <code title="delete /v2/gen-ai/knowledge_bases/{knowledge_base_uuid}/data_sources/{data_source_uuid}">client.knowledge_bases.data_sources.<a href="./src/gradient/resources/knowledge_bases/data_sources.py">delete</a>(data_source_uuid, \*, knowledge_base_uuid) -> <a href="./src/gradient/types/knowledge_bases/data_source_delete_response.py">DataSourceDeleteResponse</a></code>
 - <code title="post /v2/gen-ai/knowledge_bases/data_sources/file_upload_presigned_urls">client.knowledge_bases.data_sources.<a href="./src/gradient/resources/knowledge_bases/data_sources.py">create_presigned_urls</a>(\*\*<a href="src/gradient/types/knowledge_bases/data_source_create_presigned_urls_params.py">params</a>) -> <a href="./src/gradient/types/knowledge_bases/data_source_create_presigned_urls_response.py">DataSourceCreatePresignedURLsResponse</a></code>
@@ -1039,3 +1049,29 @@ from gradient.types import RetrieveDocumentsResponse
 Methods:
 
 - <code title="post /{knowledgeBaseId}/retrieve">client.retrieve.<a href="./src/gradient/resources/retrieve.py">documents</a>(knowledge_base_id, \*\*<a href="src/gradient/types/retrieve_documents_params.py">params</a>) -> <a href="./src/gradient/types/retrieve_documents_response.py">RetrieveDocumentsResponse</a></code>
+
+# Apps
+
+## JobInvocations
+
+Types:
+
+```python
+from gradient.types.apps import JobInvocationCancelResponse
+```
+
+Methods:
+
+- <code title="post /v2/apps/{app_id}/job-invocations/{job_invocation_id}/cancel">client.apps.job_invocations.<a href="./src/gradient/resources/apps/job_invocations.py">cancel</a>(job_invocation_id, \*, app_id, \*\*<a href="src/gradient/types/apps/job_invocation_cancel_params.py">params</a>) -> <a href="./src/gradient/types/apps/job_invocation_cancel_response.py">JobInvocationCancelResponse</a></code>
+
+# Billing
+
+Types:
+
+```python
+from gradient.types import BillingListInsightsResponse
+```
+
+Methods:
+
+- <code title="get /v2/billing/{account_urn}/insights/{start_date}/{end_date}">client.billing.<a href="./src/gradient/resources/billing.py">list_insights</a>(end_date, \*, account_urn, start_date, \*\*<a href="src/gradient/types/billing_list_insights_params.py">params</a>) -> <a href="./src/gradient/types/billing_list_insights_response.py">BillingListInsightsResponse</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 5d4e9ebe..3295b4c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "gradient"
-version = "3.10.1"
+version = "3.11.0"
 description = "The official Python library for the Gradient API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 63b7bd64..667e0dff 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -12,14 +12,14 @@
 -e file:.
 aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.13.2
+aiohttp==3.13.3
     # via gradient
     # via httpx-aiohttp
 aiosignal==1.4.0
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anyio==4.12.0
+anyio==4.12.1
     # via gradient
     # via httpx
 argcomplete==3.6.3
@@ -31,7 +31,7 @@ attrs==25.4.0
     # via nox
 backports-asyncio-runner==1.2.0
     # via pytest-asyncio
-certifi==2025.11.12
+certifi==2026.1.4
     # via httpcore
     # via httpx
 colorlog==6.10.1
@@ -61,7 +61,7 @@ httpx==0.28.1
     # via gradient
     # via httpx-aiohttp
     # via respx
-httpx-aiohttp==0.1.9
+httpx-aiohttp==0.1.12
     # via gradient
 humanize==4.13.0
     # via nox
@@ -69,7 +69,7 @@ idna==3.11
     # via anyio
     # via httpx
     # via yarl
-importlib-metadata==8.7.0
+importlib-metadata==8.7.1
 iniconfig==2.1.0
     # via pytest
 markdown-it-py==3.0.0
@@ -82,14 +82,14 @@ multidict==6.7.0
 mypy==1.17.0
 mypy-extensions==1.1.0
     # via mypy
-nodeenv==1.9.1
+nodeenv==1.10.0
     # via pyright
 nox==2025.11.12
 packaging==25.0
     # via dependency-groups
     # via nox
     # via pytest
-pathspec==0.12.1
+pathspec==1.0.3
     # via mypy
 platformdirs==4.4.0
     # via virtualenv
@@ -115,13 +115,13 @@ python-dateutil==2.9.0.post0
     # via time-machine
 respx==0.22.0
 rich==14.2.0
-ruff==0.14.7
+ruff==0.14.13
 six==1.17.0
     # via python-dateutil
 sniffio==1.3.1
     # via gradient
 time-machine==2.19.0
-tomli==2.3.0
+tomli==2.4.0
     # via dependency-groups
     # via mypy
     # via nox
@@ -141,7 +141,7 @@ typing-extensions==4.15.0
     # via virtualenv
 typing-inspection==0.4.2
     # via pydantic
-virtualenv==20.35.4
+virtualenv==20.36.1
     # via nox
 yarl==1.22.0
     # via aiohttp
diff --git a/requirements.lock b/requirements.lock
index b2623a7b..b48c65ea 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -12,21 +12,21 @@
 -e file:.
 aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.13.2
+aiohttp==3.13.3
     # via gradient
     # via httpx-aiohttp
 aiosignal==1.4.0
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anyio==4.12.0
+anyio==4.12.1
     # via gradient
     # via httpx
 async-timeout==5.0.1
     # via aiohttp
 attrs==25.4.0
     # via aiohttp
-certifi==2025.11.12
+certifi==2026.1.4
     # via httpcore
     # via httpx
 distro==1.9.0
@@ -43,7 +43,7 @@ httpcore==1.0.9
 httpx==0.28.1
     # via gradient
     # via httpx-aiohttp
-httpx-aiohttp==0.1.9
+httpx-aiohttp==0.1.12
     # via gradient
 idna==3.11
     # via anyio
diff --git a/src/gradient/_base_client.py b/src/gradient/_base_client.py
index f038b215..ca3db359 100644
--- a/src/gradient/_base_client.py
+++ b/src/gradient/_base_client.py
@@ -9,6 +9,7 @@
 import inspect
 import logging
 import platform
+import warnings
 import email.utils
 from types import TracebackType
 from random import random
@@ -51,15 +52,17 @@
     ResponseT,
     AnyMapping,
     PostParser,
+    BinaryTypes,
     RequestFiles,
     HttpxSendArgs,
     RequestOptions,
+    AsyncBinaryTypes,
     HttpxRequestFiles,
     ModelBuilderProtocol,
     not_given,
 )
 from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
-from ._compat import PYDANTIC_V1, model_copy, model_dump
+from ._compat import PYDANTIC_V1, model_copy
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
 from ._response import (
     APIResponse,
@@ -83,6 +86,7 @@
     APIConnectionError,
     APIResponseValidationError,
 )
+from ._utils._json import openapi_dumps
 
 log: logging.Logger = logging.getLogger(__name__)
 
@@ -481,8 +485,19 @@ def _build_request(
         retries_taken: int = 0,
     ) -> httpx.Request:
         if log.isEnabledFor(logging.DEBUG):
-            log.debug("Request options: %s", model_dump(options, exclude_unset=True))
-
+            log.debug(
+                "Request options",
+                # model_dump(
+                #     options,
+                #     exclude_unset=True,
+                #     # Pydantic v1 can't dump every type we support in content, so we exclude it for now.
+                #     exclude={
+                #         "content",
+                #     }
+                #     if PYDANTIC_V1
+                #     else {},
+                # ),
+            )
         kwargs: dict[str, Any] = {}
 
         json_data = options.json_data
@@ -536,10 +551,18 @@ def _build_request(
         is_body_allowed = options.method.lower() != "get"
 
         if is_body_allowed:
-            if isinstance(json_data, bytes):
+            if options.content is not None and json_data is not None:
+                raise TypeError("Passing both `content` and `json_data` is not supported")
+            if options.content is not None and files is not None:
+                raise TypeError("Passing both `content` and `files` is not supported")
+            if options.content is not None:
+                kwargs["content"] = options.content
+            elif isinstance(json_data, bytes):
                 kwargs["content"] = json_data
-            else:
-                kwargs["json"] = json_data if is_given(json_data) else None
+            elif not files:
+                # Don't set content when JSON is sent as multipart/form-data,
+                # since httpx's content param overrides other body arguments
+                kwargs["content"] = openapi_dumps(json_data) if is_given(json_data) and json_data is not None else None
             kwargs["files"] = files
         else:
             headers.pop("Content-Type", None)
@@ -1210,6 +1233,7 @@ def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: Literal[False] = False,
@@ -1222,6 +1246,7 @@ def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: Literal[True],
@@ -1235,6 +1260,7 @@ def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: bool,
@@ -1247,17 +1273,25 @@ def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if files is not None and content is not None:
+            raise TypeError("Passing both `files` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         opts = FinalRequestOptions.construct(
-            method="post",
-            url=path,
-            json_data=body,
-            files=to_httpx_files(files),
-            **options,
+            method="post", url=path, json_data=body, content=content, files=to_httpx_files(files), **options
         )
         return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
 
@@ -1267,11 +1301,23 @@ def patch(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if files is not None and content is not None:
+            raise TypeError("Passing both `files` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         opts = FinalRequestOptions.construct(
-            method="patch", url=path, json_data=body, files=to_httpx_files(files), **options
+            method="patch", url=path, json_data=body, content=content, files=to_httpx_files(files), **options
         )
         return self.request(cast_to, opts)
 
@@ -1281,15 +1327,23 @@ def put(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if files is not None and content is not None:
+            raise TypeError("Passing both `files` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         opts = FinalRequestOptions.construct(
-            method="put",
-            url=path,
-            json_data=body,
-            files=to_httpx_files(files),
-            **options,
+            method="put", url=path, json_data=body, content=content, files=to_httpx_files(files), **options
         )
         return self.request(cast_to, opts)
 
@@ -1299,9 +1353,19 @@ def delete(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: BinaryTypes | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, content=content, **options)
         return self.request(cast_to, opts)
 
     def get_api_list(
@@ -1751,6 +1815,7 @@ async def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: Literal[False] = False,
@@ -1763,6 +1828,7 @@ async def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: Literal[True],
@@ -1776,6 +1842,7 @@ async def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: bool,
@@ -1788,17 +1855,25 @@ async def post(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: bool = False,
         stream_cls: type[_AsyncStreamT] | None = None,
     ) -> ResponseT | _AsyncStreamT:
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if files is not None and content is not None:
+            raise TypeError("Passing both `files` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         opts = FinalRequestOptions.construct(
-            method="post",
-            url=path,
-            json_data=body,
-            files=await async_to_httpx_files(files),
-            **options,
+            method="post", url=path, json_data=body, content=content, files=await async_to_httpx_files(files), **options
         )
         return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
 
@@ -1808,11 +1883,28 @@ async def patch(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if files is not None and content is not None:
+            raise TypeError("Passing both `files` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         opts = FinalRequestOptions.construct(
-            method="patch", url=path, json_data=body, files=await async_to_httpx_files(files), **options
+            method="patch",
+            url=path,
+            json_data=body,
+            content=content,
+            files=await async_to_httpx_files(files),
+            **options,
         )
         return await self.request(cast_to, opts)
 
@@ -1822,15 +1914,23 @@ async def put(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if files is not None and content is not None:
+            raise TypeError("Passing both `files` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         opts = FinalRequestOptions.construct(
-            method="put",
-            url=path,
-            json_data=body,
-            files=await async_to_httpx_files(files),
-            **options,
+            method="put", url=path, json_data=body, content=content, files=await async_to_httpx_files(files), **options
         )
         return await self.request(cast_to, opts)
 
@@ -1840,9 +1940,19 @@ async def delete(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        content: AsyncBinaryTypes | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
+        if body is not None and content is not None:
+            raise TypeError("Passing both `body` and `content` is not supported")
+        if isinstance(body, bytes):
+            warnings.warn(
+                "Passing raw bytes as `body` is deprecated and will be removed in a future version. "
+                "Please pass raw bytes via the `content` parameter instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, content=content, **options)
         return await self.request(cast_to, opts)
 
     def get_api_list(
diff --git a/src/gradient/_client.py b/src/gradient/_client.py
index 847121b1..42867c7d 100644
--- a/src/gradient/_client.py
+++ b/src/gradient/_client.py
@@ -34,26 +34,29 @@
 if TYPE_CHECKING:
     from .resources import (
         nfs,
+        apps,
         chat,
         agents,
         images,
         models,
+        billing,
         regions,
         retrieve,
         databases,
         inference,
+        responses,
         gpu_droplets,
         knowledge_bases,
     )
     from .resources.images import ImagesResource, AsyncImagesResource
+    from .resources.billing import BillingResource, AsyncBillingResource
     from .resources.nfs.nfs import NfsResource, AsyncNfsResource
     from .resources.regions import RegionsResource, AsyncRegionsResource
     from .resources.retrieve import RetrieveResource, AsyncRetrieveResource
+    from .resources.apps.apps import AppsResource, AsyncAppsResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
-    from .resources.gpu_droplets import (
-        GPUDropletsResource,
-        AsyncGPUDropletsResource,
-    )
+    from .resources.responses import ResponsesResource, AsyncResponsesResource
+    from .resources.gpu_droplets import GPUDropletsResource, AsyncGPUDropletsResource
     from .resources.agents.agents import AgentsResource, AsyncAgentsResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
     from .resources.databases.databases import DatabasesResource, AsyncDatabasesResource
@@ -201,6 +204,12 @@ def images(self) -> ImagesResource:
 
         return ImagesResource(self)
 
+    @cached_property
+    def responses(self) -> ResponsesResource:
+        from .resources.responses import ResponsesResource
+
+        return ResponsesResource(self)
+
     @cached_property
     def gpu_droplets(self) -> GPUDropletsResource:
         from .resources.gpu_droplets import GPUDropletsResource
@@ -249,6 +258,18 @@ def retrieve(self) -> RetrieveResource:
 
         return RetrieveResource(self)
 
+    @cached_property
+    def apps(self) -> AppsResource:
+        from .resources.apps import AppsResource
+
+        return AppsResource(self)
+
+    @cached_property
+    def billing(self) -> BillingResource:
+        from .resources.billing import BillingResource
+
+        return BillingResource(self)
+
     @cached_property
     def with_raw_response(self) -> GradientWithRawResponse:
         return GradientWithRawResponse(self)
@@ -353,14 +374,10 @@ def copy(
         Create a new client instance re-using the same options given to the current client with optional overriding.
         """
         if default_headers is not None and set_default_headers is not None:
-            raise ValueError(
-                "The `default_headers` and `set_default_headers` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
 
         if default_query is not None and set_default_query is not None:
-            raise ValueError(
-                "The `default_query` and `set_default_query` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
 
         headers = self._custom_headers
         if default_headers is not None:
@@ -411,14 +428,10 @@ def _make_status_error(
             return _exceptions.BadRequestError(err_msg, response=response, body=body)
 
         if response.status_code == 401:
-            return _exceptions.AuthenticationError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
 
         if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
 
         if response.status_code == 404:
             return _exceptions.NotFoundError(err_msg, response=response, body=body)
@@ -427,17 +440,13 @@ def _make_status_error(
             return _exceptions.ConflictError(err_msg, response=response, body=body)
 
         if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
 
         if response.status_code == 429:
             return _exceptions.RateLimitError(err_msg, response=response, body=body)
 
         if response.status_code >= 500:
-            return _exceptions.InternalServerError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
         return APIStatusError(err_msg, response=response, body=body)
 
 
@@ -567,6 +576,12 @@ def images(self) -> AsyncImagesResource:
 
         return AsyncImagesResource(self)
 
+    @cached_property
+    def responses(self) -> AsyncResponsesResource:
+        from .resources.responses import AsyncResponsesResource
+
+        return AsyncResponsesResource(self)
+
     @cached_property
     def gpu_droplets(self) -> AsyncGPUDropletsResource:
         from .resources.gpu_droplets import AsyncGPUDropletsResource
@@ -615,6 +630,18 @@ def retrieve(self) -> AsyncRetrieveResource:
 
         return AsyncRetrieveResource(self)
 
+    @cached_property
+    def apps(self) -> AsyncAppsResource:
+        from .resources.apps import AsyncAppsResource
+
+        return AsyncAppsResource(self)
+
+    @cached_property
+    def billing(self) -> AsyncBillingResource:
+        from .resources.billing import AsyncBillingResource
+
+        return AsyncBillingResource(self)
+
     @cached_property
     def with_raw_response(self) -> AsyncGradientWithRawResponse:
         return AsyncGradientWithRawResponse(self)
@@ -719,14 +746,10 @@ def copy(
         Create a new client instance re-using the same options given to the current client with optional overriding.
         """
         if default_headers is not None and set_default_headers is not None:
-            raise ValueError(
-                "The `default_headers` and `set_default_headers` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
 
         if default_query is not None and set_default_query is not None:
-            raise ValueError(
-                "The `default_query` and `set_default_query` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
 
         headers = self._custom_headers
         if default_headers is not None:
@@ -777,14 +800,10 @@ def _make_status_error(
             return _exceptions.BadRequestError(err_msg, response=response, body=body)
 
         if response.status_code == 401:
-            return _exceptions.AuthenticationError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
 
         if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
 
         if response.status_code == 404:
             return _exceptions.NotFoundError(err_msg, response=response, body=body)
@@ -793,17 +812,13 @@ def _make_status_error(
             return _exceptions.ConflictError(err_msg, response=response, body=body)
 
         if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
 
         if response.status_code == 429:
             return _exceptions.RateLimitError(err_msg, response=response, body=body)
 
         if response.status_code >= 500:
-            return _exceptions.InternalServerError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
         return APIStatusError(err_msg, response=response, body=body)
 
 
@@ -831,6 +846,12 @@ def images(self) -> images.ImagesResourceWithRawResponse:
 
         return ImagesResourceWithRawResponse(self._client.images)
 
+    @cached_property
+    def responses(self) -> responses.ResponsesResourceWithRawResponse:
+        from .resources.responses import ResponsesResourceWithRawResponse
+
+        return ResponsesResourceWithRawResponse(self._client.responses)
+
     @cached_property
     def gpu_droplets(self) -> gpu_droplets.GPUDropletsResourceWithRawResponse:
         from .resources.gpu_droplets import GPUDropletsResourceWithRawResponse
@@ -879,6 +900,18 @@ def retrieve(self) -> retrieve.RetrieveResourceWithRawResponse:
 
         return RetrieveResourceWithRawResponse(self._client.retrieve)
 
+    @cached_property
+    def apps(self) -> apps.AppsResourceWithRawResponse:
+        from .resources.apps import AppsResourceWithRawResponse
+
+        return AppsResourceWithRawResponse(self._client.apps)
+
+    @cached_property
+    def billing(self) -> billing.BillingResourceWithRawResponse:
+        from .resources.billing import BillingResourceWithRawResponse
+
+        return BillingResourceWithRawResponse(self._client.billing)
+
 
 class AsyncGradientWithRawResponse:
     _client: AsyncGradient
@@ -904,6 +937,12 @@ def images(self) -> images.AsyncImagesResourceWithRawResponse:
 
         return AsyncImagesResourceWithRawResponse(self._client.images)
 
+    @cached_property
+    def responses(self) -> responses.AsyncResponsesResourceWithRawResponse:
+        from .resources.responses import AsyncResponsesResourceWithRawResponse
+
+        return AsyncResponsesResourceWithRawResponse(self._client.responses)
+
     @cached_property
     def gpu_droplets(self) -> gpu_droplets.AsyncGPUDropletsResourceWithRawResponse:
         from .resources.gpu_droplets import AsyncGPUDropletsResourceWithRawResponse
@@ -956,6 +995,18 @@ def retrieve(self) -> retrieve.AsyncRetrieveResourceWithRawResponse:
 
         return AsyncRetrieveResourceWithRawResponse(self._client.retrieve)
 
+    @cached_property
+    def apps(self) -> apps.AsyncAppsResourceWithRawResponse:
+        from .resources.apps import AsyncAppsResourceWithRawResponse
+
+        return AsyncAppsResourceWithRawResponse(self._client.apps)
+
+    @cached_property
+    def billing(self) -> billing.AsyncBillingResourceWithRawResponse:
+        from .resources.billing import AsyncBillingResourceWithRawResponse
+
+        return AsyncBillingResourceWithRawResponse(self._client.billing)
+
 
 class GradientWithStreamedResponse:
     _client: Gradient
@@ -981,6 +1032,12 @@ def images(self) -> images.ImagesResourceWithStreamingResponse:
 
         return ImagesResourceWithStreamingResponse(self._client.images)
 
+    @cached_property
+    def responses(self) -> responses.ResponsesResourceWithStreamingResponse:
+        from .resources.responses import ResponsesResourceWithStreamingResponse
+
+        return ResponsesResourceWithStreamingResponse(self._client.responses)
+
     @cached_property
     def gpu_droplets(self) -> gpu_droplets.GPUDropletsResourceWithStreamingResponse:
         from .resources.gpu_droplets import GPUDropletsResourceWithStreamingResponse
@@ -1033,6 +1090,18 @@ def retrieve(self) -> retrieve.RetrieveResourceWithStreamingResponse:
 
         return RetrieveResourceWithStreamingResponse(self._client.retrieve)
 
+    @cached_property
+    def apps(self) -> apps.AppsResourceWithStreamingResponse:
+        from .resources.apps import AppsResourceWithStreamingResponse
+
+        return AppsResourceWithStreamingResponse(self._client.apps)
+
+    @cached_property
+    def billing(self) -> billing.BillingResourceWithStreamingResponse:
+        from .resources.billing import BillingResourceWithStreamingResponse
+
+        return BillingResourceWithStreamingResponse(self._client.billing)
+
 
 class AsyncGradientWithStreamedResponse:
     _client: AsyncGradient
@@ -1058,6 +1127,12 @@ def images(self) -> images.AsyncImagesResourceWithStreamingResponse:
 
         return AsyncImagesResourceWithStreamingResponse(self._client.images)
 
+    @cached_property
+    def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse:
+        from .resources.responses import AsyncResponsesResourceWithStreamingResponse
+
+        return AsyncResponsesResourceWithStreamingResponse(self._client.responses)
+
     @cached_property
     def gpu_droplets(
         self,
@@ -1082,9 +1157,7 @@ def knowledge_bases(
             AsyncKnowledgeBasesResourceWithStreamingResponse,
         )
 
-        return AsyncKnowledgeBasesResourceWithStreamingResponse(
-            self._client.knowledge_bases
-        )
+        return AsyncKnowledgeBasesResourceWithStreamingResponse(self._client.knowledge_bases)
 
     @cached_property
     def models(self) -> models.AsyncModelsResourceWithStreamingResponse:
@@ -1116,6 +1189,18 @@ def retrieve(self) -> retrieve.AsyncRetrieveResourceWithStreamingResponse:
 
         return AsyncRetrieveResourceWithStreamingResponse(self._client.retrieve)
 
+    @cached_property
+    def apps(self) -> apps.AsyncAppsResourceWithStreamingResponse:
+        from .resources.apps import AsyncAppsResourceWithStreamingResponse
+
+        return AsyncAppsResourceWithStreamingResponse(self._client.apps)
+
+    @cached_property
+    def billing(self) -> billing.AsyncBillingResourceWithStreamingResponse:
+        from .resources.billing import AsyncBillingResourceWithStreamingResponse
+
+        return AsyncBillingResourceWithStreamingResponse(self._client.billing)
+
 
 Client = Gradient
 
diff --git a/src/gradient/_compat.py b/src/gradient/_compat.py
index bdef67f0..786ff42a 100644
--- a/src/gradient/_compat.py
+++ b/src/gradient/_compat.py
@@ -139,6 +139,7 @@ def model_dump(
     exclude_defaults: bool = False,
     warnings: bool = True,
     mode: Literal["json", "python"] = "python",
+    by_alias: bool | None = None,
 ) -> dict[str, Any]:
     if (not PYDANTIC_V1) or hasattr(model, "model_dump"):
         return model.model_dump(
@@ -148,13 +149,12 @@ def model_dump(
             exclude_defaults=exclude_defaults,
             # warnings are not supported in Pydantic v1
             warnings=True if PYDANTIC_V1 else warnings,
+            by_alias=by_alias,
         )
     return cast(
         "dict[str, Any]",
         model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
-            exclude=exclude,
-            exclude_unset=exclude_unset,
-            exclude_defaults=exclude_defaults,
+            exclude=exclude, exclude_unset=exclude_unset, exclude_defaults=exclude_defaults, by_alias=bool(by_alias)
         ),
     )
 
diff --git a/src/gradient/_models.py b/src/gradient/_models.py
index ca9500b2..29070e05 100644
--- a/src/gradient/_models.py
+++ b/src/gradient/_models.py
@@ -3,7 +3,20 @@
 import os
 import inspect
 import weakref
-from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Type,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterable,
+    Optional,
+    AsyncIterable,
+    cast,
+)
 from datetime import date, datetime
 from typing_extensions import (
     List,
@@ -787,6 +800,7 @@ class FinalRequestOptionsInput(TypedDict, total=False):
     timeout: float | Timeout | None
     files: HttpxRequestFiles | None
     idempotency_key: str
+    content: Union[bytes, bytearray, IO[bytes], Iterable[bytes], AsyncIterable[bytes], None]
     json_data: Body
     extra_json: AnyMapping
     follow_redirects: bool
@@ -805,6 +819,7 @@ class FinalRequestOptions(pydantic.BaseModel):
     post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
     follow_redirects: Union[bool, None] = None
 
+    content: Union[bytes, bytearray, IO[bytes], Iterable[bytes], AsyncIterable[bytes], None] = None
     # It should be noted that we cannot use `json` here as that would override
     # a BaseModel method in an incompatible fashion.
     json_data: Union[Body, None] = None
diff --git a/src/gradient/_types.py b/src/gradient/_types.py
index 65831fee..338a463d 100644
--- a/src/gradient/_types.py
+++ b/src/gradient/_types.py
@@ -13,9 +13,11 @@
     Mapping,
     TypeVar,
     Callable,
+    Iterable,
     Iterator,
     Optional,
     Sequence,
+    AsyncIterable,
 )
 from typing_extensions import (
     Set,
@@ -56,6 +58,13 @@
 else:
     Base64FileInput = Union[IO[bytes], PathLike]
     FileContent = Union[IO[bytes], bytes, PathLike]  # PathLike is not subscriptable in Python 3.8.
+
+
+# Used for sending raw binary data / streaming data in request bodies
+# e.g. for file uploads without multipart encoding
+BinaryTypes = Union[bytes, bytearray, IO[bytes], Iterable[bytes]]
+AsyncBinaryTypes = Union[bytes, bytearray, IO[bytes], AsyncIterable[bytes]]
+
 FileTypes = Union[
     # file (or bytes)
     FileContent,
diff --git a/src/gradient/_utils/_compat.py b/src/gradient/_utils/_compat.py
index dd703233..2c70b299 100644
--- a/src/gradient/_utils/_compat.py
+++ b/src/gradient/_utils/_compat.py
@@ -26,7 +26,7 @@ def is_union(tp: Optional[Type[Any]]) -> bool:
     else:
         import types
 
-        return tp is Union or tp is types.UnionType
+        return tp is Union or tp is types.UnionType  # type: ignore[comparison-overlap]
 
 
 def is_typeddict(tp: Type[Any]) -> bool:
diff --git a/src/gradient/_utils/_json.py b/src/gradient/_utils/_json.py
new file mode 100644
index 00000000..60584214
--- /dev/null
+++ b/src/gradient/_utils/_json.py
@@ -0,0 +1,35 @@
+import json
+from typing import Any
+from datetime import datetime
+from typing_extensions import override
+
+import pydantic
+
+from .._compat import model_dump
+
+
+def openapi_dumps(obj: Any) -> bytes:
+    """
+    Serialize an object to UTF-8 encoded JSON bytes.
+
+    Extends the standard json.dumps with support for additional types
+    commonly used in the SDK, such as `datetime`, `pydantic.BaseModel`, etc.
+    """
+    return json.dumps(
+        obj,
+        cls=_CustomEncoder,
+        # Uses the same defaults as httpx's JSON serialization
+        ensure_ascii=False,
+        separators=(",", ":"),
+        allow_nan=False,
+    ).encode()
+
+
+class _CustomEncoder(json.JSONEncoder):
+    @override
+    def default(self, o: Any) -> Any:
+        if isinstance(o, datetime):
+            return o.isoformat()
+        if isinstance(o, pydantic.BaseModel):
+            return model_dump(o, exclude_unset=True, mode="json", by_alias=True)
+        return super().default(o)
diff --git a/src/gradient/_version.py b/src/gradient/_version.py
index f1eb0021..07c427f5 100644
--- a/src/gradient/_version.py
+++ b/src/gradient/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "gradient"
-__version__ = "3.10.1"  # x-release-please-version
+__version__ = "3.11.0"  # x-release-please-version
diff --git a/src/gradient/resources/__init__.py b/src/gradient/resources/__init__.py
index f668bb06..6f218273 100644
--- a/src/gradient/resources/__init__.py
+++ b/src/gradient/resources/__init__.py
@@ -8,6 +8,14 @@
     NfsResourceWithStreamingResponse,
     AsyncNfsResourceWithStreamingResponse,
 )
+from .apps import (
+    AppsResource,
+    AsyncAppsResource,
+    AppsResourceWithRawResponse,
+    AsyncAppsResourceWithRawResponse,
+    AppsResourceWithStreamingResponse,
+    AsyncAppsResourceWithStreamingResponse,
+)
 from .chat import (
     ChatResource,
     AsyncChatResource,
@@ -40,6 +48,14 @@
     ModelsResourceWithStreamingResponse,
     AsyncModelsResourceWithStreamingResponse,
 )
+from .billing import (
+    BillingResource,
+    AsyncBillingResource,
+    BillingResourceWithRawResponse,
+    AsyncBillingResourceWithRawResponse,
+    BillingResourceWithStreamingResponse,
+    AsyncBillingResourceWithStreamingResponse,
+)
 from .regions import (
     RegionsResource,
     AsyncRegionsResource,
@@ -72,6 +88,14 @@
     InferenceResourceWithStreamingResponse,
     AsyncInferenceResourceWithStreamingResponse,
 )
+from .responses import (
+    ResponsesResource,
+    AsyncResponsesResource,
+    ResponsesResourceWithRawResponse,
+    AsyncResponsesResourceWithRawResponse,
+    ResponsesResourceWithStreamingResponse,
+    AsyncResponsesResourceWithStreamingResponse,
+)
 from .gpu_droplets import (
     GPUDropletsResource,
     AsyncGPUDropletsResource,
@@ -108,6 +132,12 @@
     "AsyncImagesResourceWithRawResponse",
     "ImagesResourceWithStreamingResponse",
     "AsyncImagesResourceWithStreamingResponse",
+    "ResponsesResource",
+    "AsyncResponsesResource",
+    "ResponsesResourceWithRawResponse",
+    "AsyncResponsesResourceWithRawResponse",
+    "ResponsesResourceWithStreamingResponse",
+    "AsyncResponsesResourceWithStreamingResponse",
     "GPUDropletsResource",
     "AsyncGPUDropletsResource",
     "GPUDropletsResourceWithRawResponse",
@@ -156,4 +186,16 @@
     "AsyncRetrieveResourceWithRawResponse",
     "RetrieveResourceWithStreamingResponse",
     "AsyncRetrieveResourceWithStreamingResponse",
+    "AppsResource",
+    "AsyncAppsResource",
+    "AppsResourceWithRawResponse",
+    "AsyncAppsResourceWithRawResponse",
+    "AppsResourceWithStreamingResponse",
+    "AsyncAppsResourceWithStreamingResponse",
+    "BillingResource",
+    "AsyncBillingResource",
+    "BillingResourceWithRawResponse",
+    "AsyncBillingResourceWithRawResponse",
+    "BillingResourceWithStreamingResponse",
+    "AsyncBillingResourceWithStreamingResponse",
 ]
diff --git a/src/gradient/resources/agents/evaluation_datasets.py b/src/gradient/resources/agents/evaluation_datasets.py
index 0f9631ba..db9b473d 100644
--- a/src/gradient/resources/agents/evaluation_datasets.py
+++ b/src/gradient/resources/agents/evaluation_datasets.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Iterable
+from typing_extensions import Literal
 
 import httpx
 
@@ -53,6 +54,10 @@ def with_streaming_response(self) -> EvaluationDatasetsResourceWithStreamingResp
     def create(
         self,
         *,
+        dataset_type: Literal[
+            "EVALUATION_DATASET_TYPE_UNKNOWN", "EVALUATION_DATASET_TYPE_ADK", "EVALUATION_DATASET_TYPE_NON_ADK"
+        ]
+        | Omit = omit,
         file_upload_dataset: APIFileUploadDataSourceParam | Omit = omit,
         name: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -85,6 +90,7 @@ def create(
             else "https://api.digitalocean.com/v2/gen-ai/evaluation_datasets",
             body=maybe_transform(
                 {
+                    "dataset_type": dataset_type,
                     "file_upload_dataset": file_upload_dataset,
                     "name": name,
                 },
@@ -160,6 +166,10 @@ def with_streaming_response(self) -> AsyncEvaluationDatasetsResourceWithStreamin
     async def create(
         self,
         *,
+        dataset_type: Literal[
+            "EVALUATION_DATASET_TYPE_UNKNOWN", "EVALUATION_DATASET_TYPE_ADK", "EVALUATION_DATASET_TYPE_NON_ADK"
+        ]
+        | Omit = omit,
         file_upload_dataset: APIFileUploadDataSourceParam | Omit = omit,
         name: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -192,6 +202,7 @@ async def create(
             else "https://api.digitalocean.com/v2/gen-ai/evaluation_datasets",
             body=await async_maybe_transform(
                 {
+                    "dataset_type": dataset_type,
                     "file_upload_dataset": file_upload_dataset,
                     "name": name,
                 },
diff --git a/src/gradient/resources/agents/evaluation_runs.py b/src/gradient/resources/agents/evaluation_runs.py
index 8506b00f..2b5745af 100644
--- a/src/gradient/resources/agents/evaluation_runs.py
+++ b/src/gradient/resources/agents/evaluation_runs.py
@@ -47,6 +47,7 @@ def with_streaming_response(self) -> EvaluationRunsResourceWithStreamingResponse
     def create(
         self,
         *,
+        agent_deployment_names: SequenceNotStr[str] | Omit = omit,
         agent_uuids: SequenceNotStr[str] | Omit = omit,
         run_name: str | Omit = omit,
         test_case_uuid: str | Omit = omit,
@@ -62,7 +63,9 @@ def create(
         `/v2/gen-ai/evaluation_runs`.
 
         Args:
-          agent_uuids: Agent UUIDs to run the test case against.
+          agent_deployment_names: Agent deployment names to run the test case against (ADK agent workspaces).
+
+          agent_uuids: Agent UUIDs to run the test case against (legacy agents).
 
           run_name: The name of the run.
 
@@ -82,6 +85,7 @@ def create(
             else "https://api.digitalocean.com/v2/gen-ai/evaluation_runs",
             body=maybe_transform(
                 {
+                    "agent_deployment_names": agent_deployment_names,
                     "agent_uuids": agent_uuids,
                     "run_name": run_name,
                     "test_case_uuid": test_case_uuid,
@@ -249,6 +253,7 @@ def with_streaming_response(self) -> AsyncEvaluationRunsResourceWithStreamingRes
     async def create(
         self,
         *,
+        agent_deployment_names: SequenceNotStr[str] | Omit = omit,
         agent_uuids: SequenceNotStr[str] | Omit = omit,
         run_name: str | Omit = omit,
         test_case_uuid: str | Omit = omit,
@@ -264,7 +269,9 @@ async def create(
         `/v2/gen-ai/evaluation_runs`.
 
         Args:
-          agent_uuids: Agent UUIDs to run the test case against.
+          agent_deployment_names: Agent deployment names to run the test case against (ADK agent workspaces).
+
+          agent_uuids: Agent UUIDs to run the test case against (legacy agents).
 
           run_name: The name of the run.
 
@@ -284,6 +291,7 @@ async def create(
             else "https://api.digitalocean.com/v2/gen-ai/evaluation_runs",
             body=await async_maybe_transform(
                 {
+                    "agent_deployment_names": agent_deployment_names,
                     "agent_uuids": agent_uuids,
                     "run_name": run_name,
                     "test_case_uuid": test_case_uuid,
diff --git a/src/gradient/resources/agents/evaluation_test_cases.py b/src/gradient/resources/agents/evaluation_test_cases.py
index d53b8c26..0e8cce03 100644
--- a/src/gradient/resources/agents/evaluation_test_cases.py
+++ b/src/gradient/resources/agents/evaluation_test_cases.py
@@ -56,6 +56,7 @@ def with_streaming_response(self) -> EvaluationTestCasesResourceWithStreamingRes
     def create(
         self,
         *,
+        agent_workspace_name: str | Omit = omit,
         dataset_uuid: str | Omit = omit,
         description: str | Omit = omit,
         metrics: SequenceNotStr[str] | Omit = omit,
@@ -98,6 +99,7 @@ def create(
             else "https://api.digitalocean.com/v2/gen-ai/evaluation_test_cases",
             body=maybe_transform(
                 {
+                    "agent_workspace_name": agent_workspace_name,
                     "dataset_uuid": dataset_uuid,
                     "description": description,
                     "metrics": metrics,
@@ -318,6 +320,7 @@ def with_streaming_response(self) -> AsyncEvaluationTestCasesResourceWithStreami
     async def create(
         self,
         *,
+        agent_workspace_name: str | Omit = omit,
         dataset_uuid: str | Omit = omit,
         description: str | Omit = omit,
         metrics: SequenceNotStr[str] | Omit = omit,
@@ -360,6 +363,7 @@ async def create(
             else "https://api.digitalocean.com/v2/gen-ai/evaluation_test_cases",
             body=await async_maybe_transform(
                 {
+                    "agent_workspace_name": agent_workspace_name,
                     "dataset_uuid": dataset_uuid,
                     "description": description,
                     "metrics": metrics,
diff --git a/src/gradient/resources/apps/__init__.py b/src/gradient/resources/apps/__init__.py
new file mode 100644
index 00000000..3033a599
--- /dev/null
+++ b/src/gradient/resources/apps/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .apps import (
+    AppsResource,
+    AsyncAppsResource,
+    AppsResourceWithRawResponse,
+    AsyncAppsResourceWithRawResponse,
+    AppsResourceWithStreamingResponse,
+    AsyncAppsResourceWithStreamingResponse,
+)
+from .job_invocations import (
+    JobInvocationsResource,
+    AsyncJobInvocationsResource,
+    JobInvocationsResourceWithRawResponse,
+    AsyncJobInvocationsResourceWithRawResponse,
+    JobInvocationsResourceWithStreamingResponse,
+    AsyncJobInvocationsResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "JobInvocationsResource",
+    "AsyncJobInvocationsResource",
+    "JobInvocationsResourceWithRawResponse",
+    "AsyncJobInvocationsResourceWithRawResponse",
+    "JobInvocationsResourceWithStreamingResponse",
+    "AsyncJobInvocationsResourceWithStreamingResponse",
+    "AppsResource",
+    "AsyncAppsResource",
+    "AppsResourceWithRawResponse",
+    "AsyncAppsResourceWithRawResponse",
+    "AppsResourceWithStreamingResponse",
+    "AsyncAppsResourceWithStreamingResponse",
+]
diff --git a/src/gradient/resources/apps/apps.py b/src/gradient/resources/apps/apps.py
new file mode 100644
index 00000000..889f2406
--- /dev/null
+++ b/src/gradient/resources/apps/apps.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .job_invocations import (
+    JobInvocationsResource,
+    AsyncJobInvocationsResource,
+    JobInvocationsResourceWithRawResponse,
+    AsyncJobInvocationsResourceWithRawResponse,
+    JobInvocationsResourceWithStreamingResponse,
+    AsyncJobInvocationsResourceWithStreamingResponse,
+)
+
+__all__ = ["AppsResource", "AsyncAppsResource"]
+
+
+class AppsResource(SyncAPIResource):
+    @cached_property
+    def job_invocations(self) -> JobInvocationsResource:
+        return JobInvocationsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AppsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return AppsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AppsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return AppsResourceWithStreamingResponse(self)
+
+
+class AsyncAppsResource(AsyncAPIResource):
+    @cached_property
+    def job_invocations(self) -> AsyncJobInvocationsResource:
+        return AsyncJobInvocationsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAppsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAppsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAppsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return AsyncAppsResourceWithStreamingResponse(self)
+
+
+class AppsResourceWithRawResponse:
+    def __init__(self, apps: AppsResource) -> None:
+        self._apps = apps
+
+    @cached_property
+    def job_invocations(self) -> JobInvocationsResourceWithRawResponse:
+        return JobInvocationsResourceWithRawResponse(self._apps.job_invocations)
+
+
+class AsyncAppsResourceWithRawResponse:
+    def __init__(self, apps: AsyncAppsResource) -> None:
+        self._apps = apps
+
+    @cached_property
+    def job_invocations(self) -> AsyncJobInvocationsResourceWithRawResponse:
+        return AsyncJobInvocationsResourceWithRawResponse(self._apps.job_invocations)
+
+
+class AppsResourceWithStreamingResponse:
+    def __init__(self, apps: AppsResource) -> None:
+        self._apps = apps
+
+    @cached_property
+    def job_invocations(self) -> JobInvocationsResourceWithStreamingResponse:
+        return JobInvocationsResourceWithStreamingResponse(self._apps.job_invocations)
+
+
+class AsyncAppsResourceWithStreamingResponse:
+    def __init__(self, apps: AsyncAppsResource) -> None:
+        self._apps = apps
+
+    @cached_property
+    def job_invocations(self) -> AsyncJobInvocationsResourceWithStreamingResponse:
+        return AsyncJobInvocationsResourceWithStreamingResponse(self._apps.job_invocations)
diff --git a/src/gradient/resources/apps/job_invocations.py b/src/gradient/resources/apps/job_invocations.py
new file mode 100644
index 00000000..449dd829
--- /dev/null
+++ b/src/gradient/resources/apps/job_invocations.py
@@ -0,0 +1,191 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...types.apps import job_invocation_cancel_params
+from ..._base_client import make_request_options
+from ...types.apps.job_invocation_cancel_response import JobInvocationCancelResponse
+
+__all__ = ["JobInvocationsResource", "AsyncJobInvocationsResource"]
+
+
+class JobInvocationsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> JobInvocationsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return JobInvocationsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> JobInvocationsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return JobInvocationsResourceWithStreamingResponse(self)
+
+    def cancel(
+        self,
+        job_invocation_id: str,
+        *,
+        app_id: str,
+        job_name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobInvocationCancelResponse:
+        """
+        Cancel a specific job invocation for an app.
+
+        Args:
+          job_name: The job name to list job invocations for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not app_id:
+            raise ValueError(f"Expected a non-empty value for `app_id` but received {app_id!r}")
+        if not job_invocation_id:
+            raise ValueError(f"Expected a non-empty value for `job_invocation_id` but received {job_invocation_id!r}")
+        return self._post(
+            f"/v2/apps/{app_id}/job-invocations/{job_invocation_id}/cancel"
+            if self._client._base_url_overridden
+            else f"https://api.digitalocean.com/v2/apps/{app_id}/job-invocations/{job_invocation_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"job_name": job_name}, job_invocation_cancel_params.JobInvocationCancelParams),
+            ),
+            cast_to=JobInvocationCancelResponse,
+        )
+
+
+class AsyncJobInvocationsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncJobInvocationsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncJobInvocationsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobInvocationsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return AsyncJobInvocationsResourceWithStreamingResponse(self)
+
+    async def cancel(
+        self,
+        job_invocation_id: str,
+        *,
+        app_id: str,
+        job_name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobInvocationCancelResponse:
+        """
+        Cancel a specific job invocation for an app.
+
+        Args:
+          job_name: The job name to list job invocations for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not app_id:
+            raise ValueError(f"Expected a non-empty value for `app_id` but received {app_id!r}")
+        if not job_invocation_id:
+            raise ValueError(f"Expected a non-empty value for `job_invocation_id` but received {job_invocation_id!r}")
+        return await self._post(
+            f"/v2/apps/{app_id}/job-invocations/{job_invocation_id}/cancel"
+            if self._client._base_url_overridden
+            else f"https://api.digitalocean.com/v2/apps/{app_id}/job-invocations/{job_invocation_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"job_name": job_name}, job_invocation_cancel_params.JobInvocationCancelParams
+                ),
+            ),
+            cast_to=JobInvocationCancelResponse,
+        )
+
+
+class JobInvocationsResourceWithRawResponse:
+    def __init__(self, job_invocations: JobInvocationsResource) -> None:
+        self._job_invocations = job_invocations
+
+        self.cancel = to_raw_response_wrapper(
+            job_invocations.cancel,
+        )
+
+
+class AsyncJobInvocationsResourceWithRawResponse:
+    def __init__(self, job_invocations: AsyncJobInvocationsResource) -> None:
+        self._job_invocations = job_invocations
+
+        self.cancel = async_to_raw_response_wrapper(
+            job_invocations.cancel,
+        )
+
+
+class JobInvocationsResourceWithStreamingResponse:
+    def __init__(self, job_invocations: JobInvocationsResource) -> None:
+        self._job_invocations = job_invocations
+
+        self.cancel = to_streamed_response_wrapper(
+            job_invocations.cancel,
+        )
+
+
+class AsyncJobInvocationsResourceWithStreamingResponse:
+    def __init__(self, job_invocations: AsyncJobInvocationsResource) -> None:
+        self._job_invocations = job_invocations
+
+        self.cancel = async_to_streamed_response_wrapper(
+            job_invocations.cancel,
+        )
diff --git a/src/gradient/resources/billing.py b/src/gradient/resources/billing.py
new file mode 100644
index 00000000..f9f42aad
--- /dev/null
+++ b/src/gradient/resources/billing.py
@@ -0,0 +1,226 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from datetime import date
+
+import httpx
+
+from ..types import billing_list_insights_params
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.billing_list_insights_response import BillingListInsightsResponse
+
+__all__ = ["BillingResource", "AsyncBillingResource"]
+
+
+class BillingResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BillingResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return BillingResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BillingResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return BillingResourceWithStreamingResponse(self)
+
+    def list_insights(
+        self,
+        end_date: Union[str, date],
+        *,
+        account_urn: str,
+        start_date: Union[str, date],
+        page: int | Omit = omit,
+        per_page: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BillingListInsightsResponse:
+        """
+        This endpoint returns day-over-day changes in billing resource usage based on
+        nightly invoice items, including total amount, region, SKU, and description for
+        a specified date range. It is important to note that the daily resource usage
+        may not reflect month-end billing totals when totaled for a given month as
+        nightly invoice item estimates do not necessarily encompass all invoicing
+        factors for the entire month.
+
+        Args:
+          page: Which 'page' of paginated results to return.
+
+          per_page: Number of items returned per page
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not account_urn:
+            raise ValueError(f"Expected a non-empty value for `account_urn` but received {account_urn!r}")
+        if not start_date:
+            raise ValueError(f"Expected a non-empty value for `start_date` but received {start_date!r}")
+        if not end_date:
+            raise ValueError(f"Expected a non-empty value for `end_date` but received {end_date!r}")
+        return self._get(
+            f"/v2/billing/{account_urn}/insights/{start_date}/{end_date}"
+            if self._client._base_url_overridden
+            else f"https://api.digitalocean.com/v2/billing/{account_urn}/insights/{start_date}/{end_date}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "page": page,
+                        "per_page": per_page,
+                    },
+                    billing_list_insights_params.BillingListInsightsParams,
+                ),
+            ),
+            cast_to=BillingListInsightsResponse,
+        )
+
+
+class AsyncBillingResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBillingResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBillingResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBillingResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return AsyncBillingResourceWithStreamingResponse(self)
+
+    async def list_insights(
+        self,
+        end_date: Union[str, date],
+        *,
+        account_urn: str,
+        start_date: Union[str, date],
+        page: int | Omit = omit,
+        per_page: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BillingListInsightsResponse:
+        """
+        This endpoint returns day-over-day changes in billing resource usage based on
+        nightly invoice items, including total amount, region, SKU, and description for
+        a specified date range. It is important to note that the daily resource usage
+        may not reflect month-end billing totals when totaled for a given month as
+        nightly invoice item estimates do not necessarily encompass all invoicing
+        factors for the entire month.
+
+        Args:
+          page: Which 'page' of paginated results to return.
+
+          per_page: Number of items returned per page
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not account_urn:
+            raise ValueError(f"Expected a non-empty value for `account_urn` but received {account_urn!r}")
+        if not start_date:
+            raise ValueError(f"Expected a non-empty value for `start_date` but received {start_date!r}")
+        if not end_date:
+            raise ValueError(f"Expected a non-empty value for `end_date` but received {end_date!r}")
+        return await self._get(
+            f"/v2/billing/{account_urn}/insights/{start_date}/{end_date}"
+            if self._client._base_url_overridden
+            else f"https://api.digitalocean.com/v2/billing/{account_urn}/insights/{start_date}/{end_date}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "page": page,
+                        "per_page": per_page,
+                    },
+                    billing_list_insights_params.BillingListInsightsParams,
+                ),
+            ),
+            cast_to=BillingListInsightsResponse,
+        )
+
+
+class BillingResourceWithRawResponse:
+    def __init__(self, billing: BillingResource) -> None:
+        self._billing = billing
+
+        self.list_insights = to_raw_response_wrapper(
+            billing.list_insights,
+        )
+
+
+class AsyncBillingResourceWithRawResponse:
+    def __init__(self, billing: AsyncBillingResource) -> None:
+        self._billing = billing
+
+        self.list_insights = async_to_raw_response_wrapper(
+            billing.list_insights,
+        )
+
+
+class BillingResourceWithStreamingResponse:
+    def __init__(self, billing: BillingResource) -> None:
+        self._billing = billing
+
+        self.list_insights = to_streamed_response_wrapper(
+            billing.list_insights,
+        )
+
+
+class AsyncBillingResourceWithStreamingResponse:
+    def __init__(self, billing: AsyncBillingResource) -> None:
+        self._billing = billing
+
+        self.list_insights = async_to_streamed_response_wrapper(
+            billing.list_insights,
+        )
diff --git a/src/gradient/resources/knowledge_bases/data_sources.py b/src/gradient/resources/knowledge_bases/data_sources.py
index a00d93f5..6c339108 100644
--- a/src/gradient/resources/knowledge_bases/data_sources.py
+++ b/src/gradient/resources/knowledge_bases/data_sources.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Iterable
+from typing_extensions import Literal
 
 import httpx
 
@@ -20,12 +21,14 @@
 from ...types.knowledge_bases import (
     data_source_list_params,
     data_source_create_params,
+    data_source_update_params,
     data_source_create_presigned_urls_params,
 )
 from ...types.knowledge_bases.aws_data_source_param import AwsDataSourceParam
 from ...types.knowledge_bases.data_source_list_response import DataSourceListResponse
 from ...types.knowledge_bases.data_source_create_response import DataSourceCreateResponse
 from ...types.knowledge_bases.data_source_delete_response import DataSourceDeleteResponse
+from ...types.knowledge_bases.data_source_update_response import DataSourceUpdateResponse
 from ...types.knowledge_bases.api_spaces_data_source_param import APISpacesDataSourceParam
 from ...types.knowledge_bases.api_web_crawler_data_source_param import APIWebCrawlerDataSourceParam
 from ...types.knowledge_bases.data_source_create_presigned_urls_response import DataSourceCreatePresignedURLsResponse
@@ -58,6 +61,15 @@ def create(
         path_knowledge_base_uuid: str,
         *,
         aws_data_source: AwsDataSourceParam | Omit = omit,
+        chunking_algorithm: Literal[
+            "CHUNKING_ALGORITHM_UNKNOWN",
+            "CHUNKING_ALGORITHM_SECTION_BASED",
+            "CHUNKING_ALGORITHM_HIERARCHICAL",
+            "CHUNKING_ALGORITHM_SEMANTIC",
+            "CHUNKING_ALGORITHM_FIXED_LENGTH",
+        ]
+        | Omit = omit,
+        chunking_options: data_source_create_params.ChunkingOptions | Omit = omit,
         body_knowledge_base_uuid: str | Omit = omit,
         spaces_data_source: APISpacesDataSourceParam | Omit = omit,
         web_crawler_data_source: APIWebCrawlerDataSourceParam | Omit = omit,
@@ -75,6 +87,16 @@ def create(
         Args:
           aws_data_source: AWS S3 Data Source
 
+          chunking_algorithm: The chunking algorithm to use for processing data sources.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
+          chunking_options: Configuration options for the chunking algorithm.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
           body_knowledge_base_uuid: Knowledge base id
 
           spaces_data_source: Spaces Bucket Data Source
@@ -100,6 +122,8 @@ def create(
             body=maybe_transform(
                 {
                     "aws_data_source": aws_data_source,
+                    "chunking_algorithm": chunking_algorithm,
+                    "chunking_options": chunking_options,
                     "body_knowledge_base_uuid": body_knowledge_base_uuid,
                     "spaces_data_source": spaces_data_source,
                     "web_crawler_data_source": web_crawler_data_source,
@@ -112,6 +136,84 @@ def create(
             cast_to=DataSourceCreateResponse,
         )
 
+    def update(
+        self,
+        path_data_source_uuid: str,
+        *,
+        path_knowledge_base_uuid: str,
+        chunking_algorithm: Literal[
+            "CHUNKING_ALGORITHM_UNKNOWN",
+            "CHUNKING_ALGORITHM_SECTION_BASED",
+            "CHUNKING_ALGORITHM_HIERARCHICAL",
+            "CHUNKING_ALGORITHM_SEMANTIC",
+            "CHUNKING_ALGORITHM_FIXED_LENGTH",
+        ]
+        | Omit = omit,
+        chunking_options: data_source_update_params.ChunkingOptions | Omit = omit,
+        body_data_source_uuid: str | Omit = omit,
+        body_knowledge_base_uuid: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DataSourceUpdateResponse:
+        """To update a data source (e.g.
+
+        chunking options), send a PUT request to
+        `/v2/gen-ai/knowledge_bases/{knowledge_base_uuid}/data_sources/{data_source_uuid}`.
+
+        Args:
+          chunking_algorithm: The chunking algorithm to use for processing data sources.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
+          chunking_options: Configuration options for the chunking algorithm.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
+          body_data_source_uuid: Data Source ID (Path Parameter)
+
+          body_knowledge_base_uuid: Knowledge Base ID (Path Parameter)
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not path_knowledge_base_uuid:
+            raise ValueError(
+                f"Expected a non-empty value for `path_knowledge_base_uuid` but received {path_knowledge_base_uuid!r}"
+            )
+        if not path_data_source_uuid:
+            raise ValueError(
+                f"Expected a non-empty value for `path_data_source_uuid` but received {path_data_source_uuid!r}"
+            )
+        return self._put(
+            f"/v2/gen-ai/knowledge_bases/{path_knowledge_base_uuid}/data_sources/{path_data_source_uuid}"
+            if self._client._base_url_overridden
+            else f"https://api.digitalocean.com/v2/gen-ai/knowledge_bases/{path_knowledge_base_uuid}/data_sources/{path_data_source_uuid}",
+            body=maybe_transform(
+                {
+                    "chunking_algorithm": chunking_algorithm,
+                    "chunking_options": chunking_options,
+                    "body_data_source_uuid": body_data_source_uuid,
+                    "body_knowledge_base_uuid": body_knowledge_base_uuid,
+                },
+                data_source_update_params.DataSourceUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DataSourceUpdateResponse,
+        )
+
     def list(
         self,
         knowledge_base_uuid: str,
@@ -272,6 +374,15 @@ async def create(
         path_knowledge_base_uuid: str,
         *,
         aws_data_source: AwsDataSourceParam | Omit = omit,
+        chunking_algorithm: Literal[
+            "CHUNKING_ALGORITHM_UNKNOWN",
+            "CHUNKING_ALGORITHM_SECTION_BASED",
+            "CHUNKING_ALGORITHM_HIERARCHICAL",
+            "CHUNKING_ALGORITHM_SEMANTIC",
+            "CHUNKING_ALGORITHM_FIXED_LENGTH",
+        ]
+        | Omit = omit,
+        chunking_options: data_source_create_params.ChunkingOptions | Omit = omit,
         body_knowledge_base_uuid: str | Omit = omit,
         spaces_data_source: APISpacesDataSourceParam | Omit = omit,
         web_crawler_data_source: APIWebCrawlerDataSourceParam | Omit = omit,
@@ -289,6 +400,16 @@ async def create(
         Args:
           aws_data_source: AWS S3 Data Source
 
+          chunking_algorithm: The chunking algorithm to use for processing data sources.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
+          chunking_options: Configuration options for the chunking algorithm.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
           body_knowledge_base_uuid: Knowledge base id
 
           spaces_data_source: Spaces Bucket Data Source
@@ -314,6 +435,8 @@ async def create(
             body=await async_maybe_transform(
                 {
                     "aws_data_source": aws_data_source,
+                    "chunking_algorithm": chunking_algorithm,
+                    "chunking_options": chunking_options,
                     "body_knowledge_base_uuid": body_knowledge_base_uuid,
                     "spaces_data_source": spaces_data_source,
                     "web_crawler_data_source": web_crawler_data_source,
@@ -326,6 +449,84 @@ async def create(
             cast_to=DataSourceCreateResponse,
         )
 
+    async def update(
+        self,
+        path_data_source_uuid: str,
+        *,
+        path_knowledge_base_uuid: str,
+        chunking_algorithm: Literal[
+            "CHUNKING_ALGORITHM_UNKNOWN",
+            "CHUNKING_ALGORITHM_SECTION_BASED",
+            "CHUNKING_ALGORITHM_HIERARCHICAL",
+            "CHUNKING_ALGORITHM_SEMANTIC",
+            "CHUNKING_ALGORITHM_FIXED_LENGTH",
+        ]
+        | Omit = omit,
+        chunking_options: data_source_update_params.ChunkingOptions | Omit = omit,
+        body_data_source_uuid: str | Omit = omit,
+        body_knowledge_base_uuid: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DataSourceUpdateResponse:
+        """To update a data source (e.g.
+
+        chunking options), send a PUT request to
+        `/v2/gen-ai/knowledge_bases/{knowledge_base_uuid}/data_sources/{data_source_uuid}`.
+
+        Args:
+          chunking_algorithm: The chunking algorithm to use for processing data sources.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
+          chunking_options: Configuration options for the chunking algorithm.
+
+              **Note: This feature requires enabling the knowledgebase enhancements feature
+              preview flag.**
+
+          body_data_source_uuid: Data Source ID (Path Parameter)
+
+          body_knowledge_base_uuid: Knowledge Base ID (Path Parameter)
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not path_knowledge_base_uuid:
+            raise ValueError(
+                f"Expected a non-empty value for `path_knowledge_base_uuid` but received {path_knowledge_base_uuid!r}"
+            )
+        if not path_data_source_uuid:
+            raise ValueError(
+                f"Expected a non-empty value for `path_data_source_uuid` but received {path_data_source_uuid!r}"
+            )
+        return await self._put(
+            f"/v2/gen-ai/knowledge_bases/{path_knowledge_base_uuid}/data_sources/{path_data_source_uuid}"
+            if self._client._base_url_overridden
+            else f"https://api.digitalocean.com/v2/gen-ai/knowledge_bases/{path_knowledge_base_uuid}/data_sources/{path_data_source_uuid}",
+            body=await async_maybe_transform(
+                {
+                    "chunking_algorithm": chunking_algorithm,
+                    "chunking_options": chunking_options,
+                    "body_data_source_uuid": body_data_source_uuid,
+                    "body_knowledge_base_uuid": body_knowledge_base_uuid,
+                },
+                data_source_update_params.DataSourceUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DataSourceUpdateResponse,
+        )
+
     async def list(
         self,
         knowledge_base_uuid: str,
@@ -468,6 +669,9 @@ def __init__(self, data_sources: DataSourcesResource) -> None:
         self.create = to_raw_response_wrapper(
             data_sources.create,
         )
+        self.update = to_raw_response_wrapper(
+            data_sources.update,
+        )
         self.list = to_raw_response_wrapper(
             data_sources.list,
         )
@@ -486,6 +690,9 @@ def __init__(self, data_sources: AsyncDataSourcesResource) -> None:
         self.create = async_to_raw_response_wrapper(
             data_sources.create,
         )
+        self.update = async_to_raw_response_wrapper(
+            data_sources.update,
+        )
         self.list = async_to_raw_response_wrapper(
             data_sources.list,
         )
@@ -504,6 +711,9 @@ def __init__(self, data_sources: DataSourcesResource) -> None:
         self.create = to_streamed_response_wrapper(
             data_sources.create,
         )
+        self.update = to_streamed_response_wrapper(
+            data_sources.update,
+        )
         self.list = to_streamed_response_wrapper(
             data_sources.list,
         )
@@ -522,6 +732,9 @@ def __init__(self, data_sources: AsyncDataSourcesResource) -> None:
         self.create = async_to_streamed_response_wrapper(
             data_sources.create,
         )
+        self.update = async_to_streamed_response_wrapper(
+            data_sources.update,
+        )
         self.list = async_to_streamed_response_wrapper(
             data_sources.list,
         )
diff --git a/src/gradient/resources/knowledge_bases/knowledge_bases.py b/src/gradient/resources/knowledge_bases/knowledge_bases.py
index 04c19b32..9c22a60c 100644
--- a/src/gradient/resources/knowledge_bases/knowledge_bases.py
+++ b/src/gradient/resources/knowledge_bases/knowledge_bases.py
@@ -210,9 +210,7 @@ def retrieve(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         if not uuid:
-            raise ValueError(
-                f"Expected a non-empty value for `uuid` but received {uuid!r}"
-            )
+            raise ValueError(f"Expected a non-empty value for `uuid` but received {uuid!r}")
         return self._get(
             (
                 f"/v2/gen-ai/knowledge_bases/{uuid}"
@@ -271,9 +269,7 @@ def update(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         if not path_uuid:
-            raise ValueError(
-                f"Expected a non-empty value for `path_uuid` but received {path_uuid!r}"
-            )
+            raise ValueError(f"Expected a non-empty value for `path_uuid` but received {path_uuid!r}")
         return self._put(
             (
                 f"/v2/gen-ai/knowledge_bases/{path_uuid}"
@@ -375,9 +371,7 @@ def delete(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         if not uuid:
-            raise ValueError(
-                f"Expected a non-empty value for `uuid` but received {uuid!r}"
-            )
+            raise ValueError(f"Expected a non-empty value for `uuid` but received {uuid!r}")
         return self._delete(
             (
                 f"/v2/gen-ai/knowledge_bases/{uuid}"
@@ -646,9 +640,7 @@ async def retrieve(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         if not uuid:
-            raise ValueError(
-                f"Expected a non-empty value for `uuid` but received {uuid!r}"
-            )
+            raise ValueError(f"Expected a non-empty value for `uuid` but received {uuid!r}")
         return await self._get(
             (
                 f"/v2/gen-ai/knowledge_bases/{uuid}"
@@ -707,9 +699,7 @@ async def update(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         if not path_uuid:
-            raise ValueError(
-                f"Expected a non-empty value for `path_uuid` but received {path_uuid!r}"
-            )
+            raise ValueError(f"Expected a non-empty value for `path_uuid` but received {path_uuid!r}")
         return await self._put(
             (
                 f"/v2/gen-ai/knowledge_bases/{path_uuid}"
@@ -811,9 +801,7 @@ async def delete(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         if not uuid:
-            raise ValueError(
-                f"Expected a non-empty value for `uuid` but received {uuid!r}"
-            )
+            raise ValueError(f"Expected a non-empty value for `uuid` but received {uuid!r}")
         return await self._delete(
             (
                 f"/v2/gen-ai/knowledge_bases/{uuid}"
@@ -1011,15 +999,11 @@ def __init__(self, knowledge_bases: AsyncKnowledgeBasesResource) -> None:
 
     @cached_property
     def data_sources(self) -> AsyncDataSourcesResourceWithRawResponse:
-        return AsyncDataSourcesResourceWithRawResponse(
-            self._knowledge_bases.data_sources
-        )
+        return AsyncDataSourcesResourceWithRawResponse(self._knowledge_bases.data_sources)
 
     @cached_property
     def indexing_jobs(self) -> AsyncIndexingJobsResourceWithRawResponse:
-        return AsyncIndexingJobsResourceWithRawResponse(
-            self._knowledge_bases.indexing_jobs
-        )
+        return AsyncIndexingJobsResourceWithRawResponse(self._knowledge_bases.indexing_jobs)
 
 
 class KnowledgeBasesResourceWithStreamingResponse:
@@ -1050,15 +1034,11 @@ def __init__(self, knowledge_bases: KnowledgeBasesResource) -> None:
 
     @cached_property
     def data_sources(self) -> DataSourcesResourceWithStreamingResponse:
-        return DataSourcesResourceWithStreamingResponse(
-            self._knowledge_bases.data_sources
-        )
+        return DataSourcesResourceWithStreamingResponse(self._knowledge_bases.data_sources)
 
     @cached_property
     def indexing_jobs(self) -> IndexingJobsResourceWithStreamingResponse:
-        return IndexingJobsResourceWithStreamingResponse(
-            self._knowledge_bases.indexing_jobs
-        )
+        return IndexingJobsResourceWithStreamingResponse(self._knowledge_bases.indexing_jobs)
 
 
 class AsyncKnowledgeBasesResourceWithStreamingResponse:
@@ -1089,12 +1069,8 @@ def __init__(self, knowledge_bases: AsyncKnowledgeBasesResource) -> None:
 
     @cached_property
     def data_sources(self) -> AsyncDataSourcesResourceWithStreamingResponse:
-        return AsyncDataSourcesResourceWithStreamingResponse(
-            self._knowledge_bases.data_sources
-        )
+        return AsyncDataSourcesResourceWithStreamingResponse(self._knowledge_bases.data_sources)
 
     @cached_property
     def indexing_jobs(self) -> AsyncIndexingJobsResourceWithStreamingResponse:
-        return AsyncIndexingJobsResourceWithStreamingResponse(
-            self._knowledge_bases.indexing_jobs
-        )
+        return AsyncIndexingJobsResourceWithStreamingResponse(self._knowledge_bases.indexing_jobs)
diff --git a/src/gradient/resources/responses.py b/src/gradient/resources/responses.py
new file mode 100644
index 00000000..d0892fa9
--- /dev/null
+++ b/src/gradient/resources/responses.py
@@ -0,0 +1,860 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..types import response_create_params
+from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from .._utils import required_args, maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._streaming import Stream, AsyncStream
+from .._base_client import make_request_options
+from ..types.shared.create_response_response import CreateResponseResponse
+from ..types.shared.create_response_stream_response import CreateResponseStreamResponse
+
+__all__ = ["ResponsesResource", "AsyncResponsesResource"]
+
+
+class ResponsesResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ResponsesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return ResponsesResourceWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CreateResponseResponse:
+        """Generate text responses from text prompts.
+
+        This endpoint supports both streaming
+        and non-streaming responses for VLLM models only.
+
+        Args:
+          input: The input text prompt or conversation history. Can be a string or an array of
+              message objects for conversation context.
+
+          model: Model ID used to generate the response. Must be a VLLM model.
+
+          instructions: System-level instructions for the model. This sets the behavior and context for
+              the response generation.
+
+          max_output_tokens: Maximum number of tokens to generate in the response. If not specified, the
+              model will use a default value.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion. Alias for
+              max_output_tokens for compatibility.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Specifies the output types the model should generate. For text-to-text, this
+              should be ["text"].
+
+          parallel_tool_calls: Whether to enable parallel tool calls. When true, the model can make multiple
+              tool calls in parallel.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Uses Responses API format (with `name`, `description`, `parameters` at top
+              level).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        stream: Literal[True],
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[CreateResponseStreamResponse]:
+        """Generate text responses from text prompts.
+
+        This endpoint supports both streaming
+        and non-streaming responses for VLLM models only.
+
+        Args:
+          input: The input text prompt or conversation history. Can be a string or an array of
+              message objects for conversation context.
+
+          model: Model ID used to generate the response. Must be a VLLM model.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          instructions: System-level instructions for the model. This sets the behavior and context for
+              the response generation.
+
+          max_output_tokens: Maximum number of tokens to generate in the response. If not specified, the
+              model will use a default value.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion. Alias for
+              max_output_tokens for compatibility.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Specifies the output types the model should generate. For text-to-text, this
+              should be ["text"].
+
+          parallel_tool_calls: Whether to enable parallel tool calls. When true, the model can make multiple
+              tool calls in parallel.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Uses Responses API format (with `name`, `description`, `parameters` at top
+              level).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        stream: bool,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CreateResponseResponse | Stream[CreateResponseStreamResponse]:
+        """Generate text responses from text prompts.
+
+        This endpoint supports both streaming
+        and non-streaming responses for VLLM models only.
+
+        Args:
+          input: The input text prompt or conversation history. Can be a string or an array of
+              message objects for conversation context.
+
+          model: Model ID used to generate the response. Must be a VLLM model.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          instructions: System-level instructions for the model. This sets the behavior and context for
+              the response generation.
+
+          max_output_tokens: Maximum number of tokens to generate in the response. If not specified, the
+              model will use a default value.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion. Alias for
+              max_output_tokens for compatibility.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Specifies the output types the model should generate. For text-to-text, this
+              should be ["text"].
+
+          parallel_tool_calls: Whether to enable parallel tool calls. When true, the model can make multiple
+              tool calls in parallel.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Uses Responses API format (with `name`, `description`, `parameters` at top
+              level).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CreateResponseResponse | Stream[CreateResponseStreamResponse]:
+        return self._post(
+            "/responses" if self._client._base_url_overridden else f"{self._client.inference_endpoint}/v1/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CreateResponseResponse,
+            stream=stream or False,
+            stream_cls=Stream[CreateResponseStreamResponse],
+        )
+
+
+class AsyncResponsesResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/digitalocean/gradient-python#with_streaming_response
+        """
+        return AsyncResponsesResourceWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CreateResponseResponse:
+        """Generate text responses from text prompts.
+
+        This endpoint supports both streaming
+        and non-streaming responses for VLLM models only.
+
+        Args:
+          input: The input text prompt or conversation history. Can be a string or an array of
+              message objects for conversation context.
+
+          model: Model ID used to generate the response. Must be a VLLM model.
+
+          instructions: System-level instructions for the model. This sets the behavior and context for
+              the response generation.
+
+          max_output_tokens: Maximum number of tokens to generate in the response. If not specified, the
+              model will use a default value.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion. Alias for
+              max_output_tokens for compatibility.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Specifies the output types the model should generate. For text-to-text, this
+              should be ["text"].
+
+          parallel_tool_calls: Whether to enable parallel tool calls. When true, the model can make multiple
+              tool calls in parallel.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Uses Responses API format (with `name`, `description`, `parameters` at top
+              level).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        stream: Literal[True],
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[CreateResponseStreamResponse]:
+        """Generate text responses from text prompts.
+
+        This endpoint supports both streaming
+        and non-streaming responses for VLLM models only.
+
+        Args:
+          input: The input text prompt or conversation history. Can be a string or an array of
+              message objects for conversation context.
+
+          model: Model ID used to generate the response. Must be a VLLM model.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          instructions: System-level instructions for the model. This sets the behavior and context for
+              the response generation.
+
+          max_output_tokens: Maximum number of tokens to generate in the response. If not specified, the
+              model will use a default value.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion. Alias for
+              max_output_tokens for compatibility.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Specifies the output types the model should generate. For text-to-text, this
+              should be ["text"].
+
+          parallel_tool_calls: Whether to enable parallel tool calls. When true, the model can make multiple
+              tool calls in parallel.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Uses Responses API format (with `name`, `description`, `parameters` at top
+              level).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        stream: bool,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CreateResponseResponse | AsyncStream[CreateResponseStreamResponse]:
+        """Generate text responses from text prompts.
+
+        This endpoint supports both streaming
+        and non-streaming responses for VLLM models only.
+
+        Args:
+          input: The input text prompt or conversation history. Can be a string or an array of
+              message objects for conversation context.
+
+          model: Model ID used to generate the response. Must be a VLLM model.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using server-sent events.
+
+          instructions: System-level instructions for the model. This sets the behavior and context for
+              the response generation.
+
+          max_output_tokens: Maximum number of tokens to generate in the response. If not specified, the
+              model will use a default value.
+
+          max_tokens: The maximum number of tokens that can be generated in the completion. Alias for
+              max_output_tokens for compatibility.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Specifies the output types the model should generate. For text-to-text, this
+              should be ["text"].
+
+          parallel_tool_calls: Whether to enable parallel tool calls. When true, the model can make multiple
+              tool calls in parallel.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Uses Responses API format (with `name`, `description`, `parameters` at top
+              level).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help DigitalOcean to
+              monitor and detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    async def create(
+        self,
+        *,
+        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
+        model: str,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
+        modalities: Optional[List[Literal["text"]]] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[response_create_params.Tool] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CreateResponseResponse | AsyncStream[CreateResponseStreamResponse]:
+        return await self._post(
+            "/responses" if self._client._base_url_overridden else f"{self._client.inference_endpoint}/v1/responses",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CreateResponseResponse,
+            stream=stream or False,
+            stream_cls=AsyncStream[CreateResponseStreamResponse],
+        )
+
+
+class ResponsesResourceWithRawResponse:
+    def __init__(self, responses: ResponsesResource) -> None:
+        self._responses = responses
+
+        self.create = to_raw_response_wrapper(
+            responses.create,
+        )
+
+
+class AsyncResponsesResourceWithRawResponse:
+    def __init__(self, responses: AsyncResponsesResource) -> None:
+        self._responses = responses
+
+        self.create = async_to_raw_response_wrapper(
+            responses.create,
+        )
+
+
+class ResponsesResourceWithStreamingResponse:
+    def __init__(self, responses: ResponsesResource) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+
+
+class AsyncResponsesResourceWithStreamingResponse:
+    def __init__(self, responses: AsyncResponsesResource) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
diff --git a/src/gradient/resources/retrieve.py b/src/gradient/resources/retrieve.py
index f8335ab3..992fa37f 100644
--- a/src/gradient/resources/retrieve.py
+++ b/src/gradient/resources/retrieve.py
@@ -67,6 +67,12 @@ def documents(
         3. Performs vector similarity search in the knowledge base
         4. Returns the most relevant document chunks
 
+        The search supports hybrid search combining:
+
+        - Vector similarity (semantic search)
+        - Keyword matching (BM25)
+        - Custom metadata filters
+
         Args:
           num_results: Number of results to return
 
@@ -162,6 +168,12 @@ async def documents(
         3. Performs vector similarity search in the knowledge base
         4. Returns the most relevant document chunks
 
+        The search supports hybrid search combining:
+
+        - Vector similarity (semantic search)
+        - Keyword matching (BM25)
+        - Custom metadata filters
+
         Args:
           num_results: Number of results to return
 
diff --git a/src/gradient/types/__init__.py b/src/gradient/types/__init__.py
index e927e099..fc2907b2 100644
--- a/src/gradient/types/__init__.py
+++ b/src/gradient/types/__init__.py
@@ -43,10 +43,12 @@
     ChatCompletionChunk as ChatCompletionChunk,
     ImageGenStreamEvent as ImageGenStreamEvent,
     SubscriptionTierBase as SubscriptionTierBase,
+    CreateResponseResponse as CreateResponseResponse,
     ImageGenCompletedEvent as ImageGenCompletedEvent,
     DropletNextBackupWindow as DropletNextBackupWindow,
     ImageGenPartialImageEvent as ImageGenPartialImageEvent,
     ChatCompletionTokenLogprob as ChatCompletionTokenLogprob,
+    CreateResponseStreamResponse as CreateResponseStreamResponse,
 )
 from .api_agent import APIAgent as APIAgent
 from .api_model import APIModel as APIModel
@@ -77,6 +79,7 @@
 from .droplet_backup_policy import DropletBackupPolicy as DropletBackupPolicy
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
 from .api_agent_api_key_info import APIAgentAPIKeyInfo as APIAgentAPIKeyInfo
+from .response_create_params import ResponseCreateParams as ResponseCreateParams
 from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
 from .api_openai_api_key_info import APIOpenAIAPIKeyInfo as APIOpenAIAPIKeyInfo
 from .gpu_droplet_list_params import GPUDropletListParams as GPUDropletListParams
@@ -105,6 +108,7 @@
 from .agent_update_status_response import (
     AgentUpdateStatusResponse as AgentUpdateStatusResponse,
 )
+from .billing_list_insights_params import BillingListInsightsParams as BillingListInsightsParams
 from .knowledge_base_create_params import (
     KnowledgeBaseCreateParams as KnowledgeBaseCreateParams,
 )
@@ -118,6 +122,9 @@
 from .gpu_droplet_retrieve_response import (
     GPUDropletRetrieveResponse as GPUDropletRetrieveResponse,
 )
+from .billing_list_insights_response import (
+    BillingListInsightsResponse as BillingListInsightsResponse,
+)
 from .knowledge_base_create_response import (
     KnowledgeBaseCreateResponse as KnowledgeBaseCreateResponse,
 )
diff --git a/src/gradient/types/agents/api_evaluation_prompt.py b/src/gradient/types/agents/api_evaluation_prompt.py
index 7471e9ae..1bb08bf1 100644
--- a/src/gradient/types/agents/api_evaluation_prompt.py
+++ b/src/gradient/types/agents/api_evaluation_prompt.py
@@ -1,11 +1,60 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
 
 from ..._models import BaseModel
 from .api_evaluation_metric_result import APIEvaluationMetricResult
 
-__all__ = ["APIEvaluationPrompt", "PromptChunk"]
+__all__ = ["APIEvaluationPrompt", "EvaluationTraceSpan", "EvaluationTraceSpanRetrieverChunk", "PromptChunk"]
+
+
+class EvaluationTraceSpanRetrieverChunk(BaseModel):
+    chunk_usage_pct: Optional[float] = None
+    """The usage percentage of the chunk."""
+
+    chunk_used: Optional[bool] = None
+    """Indicates if the chunk was used in the prompt."""
+
+    index_uuid: Optional[str] = None
+    """The index uuid (Knowledge Base) of the chunk."""
+
+    source_name: Optional[str] = None
+    """The source name for the chunk, e.g., the file name or document title."""
+
+    text: Optional[str] = None
+    """Text content of the chunk."""
+
+
+class EvaluationTraceSpan(BaseModel):
+    """Represents a span within an evaluatioin trace (e.g., LLM call, tool call, etc.)"""
+
+    created_at: Optional[datetime] = None
+    """When the span was created"""
+
+    input: Optional[object] = None
+    """
+    Input data for the span (flexible structure - can be messages array, string,
+    etc.)
+    """
+
+    name: Optional[str] = None
+    """Name/identifier for the span"""
+
+    output: Optional[object] = None
+    """Output data from the span (flexible structure - can be message, string, etc.)"""
+
+    retriever_chunks: Optional[List[EvaluationTraceSpanRetrieverChunk]] = None
+    """Any retriever span chunks that were included as part of the span."""
+
+    span_level_metric_results: Optional[List[APIEvaluationMetricResult]] = None
+    """The span-level metric results."""
+
+    type: Optional[
+        Literal["TRACE_SPAN_TYPE_UNKNOWN", "TRACE_SPAN_TYPE_LLM", "TRACE_SPAN_TYPE_RETRIEVER", "TRACE_SPAN_TYPE_TOOL"]
+    ] = None
+    """Types of spans in a trace"""
 
 
 class PromptChunk(BaseModel):
@@ -26,6 +75,9 @@ class PromptChunk(BaseModel):
 
 
 class APIEvaluationPrompt(BaseModel):
+    evaluation_trace_spans: Optional[List[EvaluationTraceSpan]] = None
+    """The evaluated trace spans."""
+
     ground_truth: Optional[str] = None
     """The ground truth for the prompt."""
 
@@ -47,3 +99,6 @@ class APIEvaluationPrompt(BaseModel):
 
     prompt_level_metric_results: Optional[List[APIEvaluationMetricResult]] = None
     """The metric results for the prompt."""
+
+    trace_id: Optional[str] = None
+    """The trace id for the prompt."""
diff --git a/src/gradient/types/agents/api_evaluation_run.py b/src/gradient/types/agents/api_evaluation_run.py
index 5a758898..ed4ef0a1 100644
--- a/src/gradient/types/agents/api_evaluation_run.py
+++ b/src/gradient/types/agents/api_evaluation_run.py
@@ -14,6 +14,9 @@ class APIEvaluationRun(BaseModel):
     agent_deleted: Optional[bool] = None
     """Whether agent is deleted"""
 
+    agent_deployment_name: Optional[str] = None
+    """The agent deployment name"""
+
     agent_name: Optional[str] = None
     """Agent name"""
 
diff --git a/src/gradient/types/agents/chat/completion_create_params.py b/src/gradient/types/agents/chat/completion_create_params.py
index d238f8e1..797c6ea3 100644
--- a/src/gradient/types/agents/chat/completion_create_params.py
+++ b/src/gradient/types/agents/chat/completion_create_params.py
@@ -11,20 +11,42 @@
     "CompletionCreateParamsBase",
     "Message",
     "MessageChatCompletionRequestSystemMessage",
+    "MessageChatCompletionRequestSystemMessageContent",
+    "MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestSystemMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestDeveloperMessage",
+    "MessageChatCompletionRequestDeveloperMessageContent",
+    "MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestUserMessage",
+    "MessageChatCompletionRequestUserMessageContent",
+    "MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestUserMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestAssistantMessage",
+    "MessageChatCompletionRequestAssistantMessageContent",
+    "MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestAssistantMessageToolCall",
     "MessageChatCompletionRequestAssistantMessageToolCallFunction",
     "MessageChatCompletionRequestToolMessage",
+    "MessageChatCompletionRequestToolMessageContent",
+    "MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartTextCacheControl",
+    "MessageChatCompletionRequestToolMessageContentArrayOfContentPart",
+    "MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "StreamOptions",
     "ToolChoice",
     "ToolChoiceChatCompletionNamedToolChoice",
@@ -165,7 +187,46 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
 
-class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
     """Content part with type and text"""
 
     text: Required[str]
@@ -174,9 +235,18 @@ class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMem
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestSystemMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestSystemMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestSystemMessageContentArrayOfContentPart],
 ]
 
 
@@ -186,14 +256,26 @@ class MessageChatCompletionRequestSystemMessage(TypedDict, total=False):
     messages sent by the user.
     """
 
-    content: Required[Union[str, SequenceNotStr[MessageChatCompletionRequestSystemMessageContentArrayOfContentPart]]]
+    content: Required[MessageChatCompletionRequestSystemMessageContent]
     """The contents of the system message."""
 
     role: Required[Literal["system"]]
     """The role of the messages author, in this case `system`."""
 
 
-class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
     """Content part with type and text"""
 
     text: Required[str]
@@ -202,9 +284,45 @@ class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnion
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestDeveloperMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart],
 ]
 
 
@@ -214,14 +332,26 @@ class MessageChatCompletionRequestDeveloperMessage(TypedDict, total=False):
     messages sent by the user.
     """
 
-    content: Required[Union[str, SequenceNotStr[MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart]]]
+    content: Required[MessageChatCompletionRequestDeveloperMessageContent]
     """The contents of the developer message."""
 
     role: Required[Literal["developer"]]
     """The role of the messages author, in this case `developer`."""
 
 
-class MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
     """Content part with type and text"""
 
     text: Required[str]
@@ -230,9 +360,47 @@ class MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMembe
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: (
+        MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    )
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestUserMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestUserMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestUserMessageContentArrayOfContentPart],
 ]
 
 
@@ -242,14 +410,26 @@ class MessageChatCompletionRequestUserMessage(TypedDict, total=False):
     information.
     """
 
-    content: Required[Union[str, SequenceNotStr[MessageChatCompletionRequestUserMessageContentArrayOfContentPart]]]
+    content: Required[MessageChatCompletionRequestUserMessageContent]
     """The contents of the user message."""
 
     role: Required[Literal["user"]]
     """The role of the messages author, in this case `user`."""
 
 
-class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
     """Content part with type and text"""
 
     text: Required[str]
@@ -258,9 +438,45 @@ class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnion
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestAssistantMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart],
 ]
 
 
@@ -296,15 +512,80 @@ class MessageChatCompletionRequestAssistantMessage(TypedDict, total=False):
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
-    content: Union[str, SequenceNotStr[MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart], None]
+    content: Optional[MessageChatCompletionRequestAssistantMessageContent]
     """The contents of the assistant message."""
 
     tool_calls: Iterable[MessageChatCompletionRequestAssistantMessageToolCall]
     """The tool calls generated by the model, such as function calls."""
 
 
+class MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: (
+        MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    )
+    """Cache control settings for the content part."""
+
+
+MessageChatCompletionRequestToolMessageContentArrayOfContentPart: TypeAlias = Union[
+    str, MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestToolMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestToolMessageContentArrayOfContentPart],
+]
+
+
 class MessageChatCompletionRequestToolMessage(TypedDict, total=False):
-    content: Required[str]
+    content: Required[MessageChatCompletionRequestToolMessageContent]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]
diff --git a/src/gradient/types/agents/evaluation_dataset_create_params.py b/src/gradient/types/agents/evaluation_dataset_create_params.py
index c8a84c23..661a42da 100644
--- a/src/gradient/types/agents/evaluation_dataset_create_params.py
+++ b/src/gradient/types/agents/evaluation_dataset_create_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing_extensions import TypedDict
+from typing_extensions import Literal, TypedDict
 
 from ..knowledge_bases.api_file_upload_data_source_param import APIFileUploadDataSourceParam
 
@@ -10,6 +10,10 @@
 
 
 class EvaluationDatasetCreateParams(TypedDict, total=False):
+    dataset_type: Literal[
+        "EVALUATION_DATASET_TYPE_UNKNOWN", "EVALUATION_DATASET_TYPE_ADK", "EVALUATION_DATASET_TYPE_NON_ADK"
+    ]
+
     file_upload_dataset: APIFileUploadDataSourceParam
     """File to upload as data source for knowledge base."""
 
diff --git a/src/gradient/types/agents/evaluation_run_create_params.py b/src/gradient/types/agents/evaluation_run_create_params.py
index 52bbee85..b5e60803 100644
--- a/src/gradient/types/agents/evaluation_run_create_params.py
+++ b/src/gradient/types/agents/evaluation_run_create_params.py
@@ -10,8 +10,11 @@
 
 
 class EvaluationRunCreateParams(TypedDict, total=False):
+    agent_deployment_names: SequenceNotStr[str]
+    """Agent deployment names to run the test case against (ADK agent workspaces)."""
+
     agent_uuids: SequenceNotStr[str]
-    """Agent UUIDs to run the test case against."""
+    """Agent UUIDs to run the test case against (legacy agents)."""
 
     run_name: str
     """The name of the run."""
diff --git a/src/gradient/types/agents/evaluation_test_case_create_params.py b/src/gradient/types/agents/evaluation_test_case_create_params.py
index af49d024..ff0666b9 100644
--- a/src/gradient/types/agents/evaluation_test_case_create_params.py
+++ b/src/gradient/types/agents/evaluation_test_case_create_params.py
@@ -11,6 +11,8 @@
 
 
 class EvaluationTestCaseCreateParams(TypedDict, total=False):
+    agent_workspace_name: str
+
     dataset_uuid: str
     """Dataset against which the test‑case is executed."""
 
diff --git a/src/gradient/types/api_agent_model.py b/src/gradient/types/api_agent_model.py
index e42bb5d5..a6a36c6f 100644
--- a/src/gradient/types/api_agent_model.py
+++ b/src/gradient/types/api_agent_model.py
@@ -29,6 +29,15 @@ class APIAgentModel(BaseModel):
     is_foundational: Optional[bool] = None
     """True if it is a foundational model provided by do"""
 
+    kb_default_chunk_size: Optional[int] = None
+    """Default chunking size limit to show in UI"""
+
+    kb_max_chunk_size: Optional[int] = None
+    """Maximum chunk size limit of model"""
+
+    kb_min_chunk_size: Optional[int] = None
+    """Minimum chunking size token limits if model supports KNOWLEDGEBASE usecase"""
+
     metadata: Optional[object] = None
     """Additional meta data"""
 
diff --git a/src/gradient/types/api_model.py b/src/gradient/types/api_model.py
index 83b1b66a..1d9752e4 100644
--- a/src/gradient/types/api_model.py
+++ b/src/gradient/types/api_model.py
@@ -25,6 +25,15 @@ class APIModel(BaseModel):
     is_foundational: Optional[bool] = None
     """True if it is a foundational model provided by do"""
 
+    kb_default_chunk_size: Optional[int] = None
+    """Default chunking size limit to show in UI"""
+
+    kb_max_chunk_size: Optional[int] = None
+    """Maximum chunk size limit of model"""
+
+    kb_min_chunk_size: Optional[int] = None
+    """Minimum chunking size token limits if model supports KNOWLEDGEBASE usecase"""
+
     name: Optional[str] = None
     """Display name of the model"""
 
diff --git a/src/gradient/types/apps/__init__.py b/src/gradient/types/apps/__init__.py
new file mode 100644
index 00000000..b4a2e426
--- /dev/null
+++ b/src/gradient/types/apps/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .job_invocation_cancel_params import JobInvocationCancelParams as JobInvocationCancelParams
+from .job_invocation_cancel_response import JobInvocationCancelResponse as JobInvocationCancelResponse
diff --git a/src/gradient/types/apps/job_invocation_cancel_params.py b/src/gradient/types/apps/job_invocation_cancel_params.py
new file mode 100644
index 00000000..b026fcb9
--- /dev/null
+++ b/src/gradient/types/apps/job_invocation_cancel_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["JobInvocationCancelParams"]
+
+
+class JobInvocationCancelParams(TypedDict, total=False):
+    app_id: Required[str]
+
+    job_name: str
+    """The job name to list job invocations for."""
diff --git a/src/gradient/types/apps/job_invocation_cancel_response.py b/src/gradient/types/apps/job_invocation_cancel_response.py
new file mode 100644
index 00000000..96b2c642
--- /dev/null
+++ b/src/gradient/types/apps/job_invocation_cancel_response.py
@@ -0,0 +1,77 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = [
+    "JobInvocationCancelResponse",
+    "Trigger",
+    "TriggerManual",
+    "TriggerManualUser",
+    "TriggerScheduled",
+    "TriggerScheduledSchedule",
+]
+
+
+class TriggerManualUser(BaseModel):
+    """The user who triggered the job"""
+
+    email: Optional[str] = None
+
+    full_name: Optional[str] = None
+
+    uuid: Optional[str] = None
+
+
+class TriggerManual(BaseModel):
+    """Details about the manual trigger, if applicable"""
+
+    user: Optional[TriggerManualUser] = None
+    """The user who triggered the job"""
+
+
+class TriggerScheduledSchedule(BaseModel):
+    cron: Optional[str] = None
+    """The cron expression defining the schedule"""
+
+    time_zone: Optional[str] = None
+    """The time zone for the schedule"""
+
+
+class TriggerScheduled(BaseModel):
+    """The schedule for the job"""
+
+    schedule: Optional[TriggerScheduledSchedule] = None
+
+
+class Trigger(BaseModel):
+    manual: Optional[TriggerManual] = None
+    """Details about the manual trigger, if applicable"""
+
+    scheduled: Optional[TriggerScheduled] = None
+    """The schedule for the job"""
+
+    type: Optional[Literal["MANUAL", "SCHEDULE", "UNKNOWN"]] = None
+    """The type of trigger that initiated the job invocation."""
+
+
+class JobInvocationCancelResponse(BaseModel):
+    id: Optional[str] = None
+
+    completed_at: Optional[datetime] = None
+
+    created_at: Optional[datetime] = None
+
+    deployment_id: Optional[str] = None
+
+    job_name: Optional[str] = None
+
+    phase: Optional[Literal["UNKNOWN", "PENDING", "RUNNING", "SUCCEEDED", "FAILED", "CANCELED", "SKIPPED"]] = None
+    """The phase of the job invocation"""
+
+    started_at: Optional[datetime] = None
+
+    trigger: Optional[Trigger] = None
diff --git a/src/gradient/types/billing_list_insights_params.py b/src/gradient/types/billing_list_insights_params.py
new file mode 100644
index 00000000..6e89dd1e
--- /dev/null
+++ b/src/gradient/types/billing_list_insights_params.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from datetime import date
+from typing_extensions import Required, Annotated, TypedDict
+
+from .._utils import PropertyInfo
+
+__all__ = ["BillingListInsightsParams"]
+
+
+class BillingListInsightsParams(TypedDict, total=False):
+    account_urn: Required[str]
+
+    start_date: Required[Annotated[Union[str, date], PropertyInfo(format="iso8601")]]
+
+    page: int
+    """Which 'page' of paginated results to return."""
+
+    per_page: int
+    """Number of items returned per page"""
diff --git a/src/gradient/types/billing_list_insights_response.py b/src/gradient/types/billing_list_insights_response.py
new file mode 100644
index 00000000..f7515a6b
--- /dev/null
+++ b/src/gradient/types/billing_list_insights_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import date
+
+from .._models import BaseModel
+
+__all__ = ["BillingListInsightsResponse", "DataPoint"]
+
+
+class DataPoint(BaseModel):
+    description: Optional[str] = None
+    """Description of the billed resource or service as shown on an invoice item"""
+
+    group_description: Optional[str] = None
+    """
+    Optional invoice item group name of the billed resource or service, blank when
+    not part an invoice item group
+    """
+
+    region: Optional[str] = None
+    """Region where the usage occurred"""
+
+    sku: Optional[str] = None
+    """Unique SKU identifier for the billed resource"""
+
+    start_date: Optional[date] = None
+    """Start date of the billing data point in YYYY-MM-DD format"""
+
+    total_amount: Optional[str] = None
+    """Total amount for this data point in USD"""
+
+    usage_team_urn: Optional[str] = None
+    """URN of the team that incurred the usage"""
+
+
+class BillingListInsightsResponse(BaseModel):
+    current_page: int
+    """Current page number"""
+
+    data_points: List[DataPoint]
+    """
+    Array of billing data points, which are day-over-day changes in billing resource
+    usage based on nightly invoice item estimates, for the requested period
+    """
+
+    total_items: int
+    """Total number of items available across all pages"""
+
+    total_pages: int
+    """Total number of pages available"""
diff --git a/src/gradient/types/chat/completion_create_params.py b/src/gradient/types/chat/completion_create_params.py
index e889c5e8..bf5bd49d 100644
--- a/src/gradient/types/chat/completion_create_params.py
+++ b/src/gradient/types/chat/completion_create_params.py
@@ -11,20 +11,42 @@
     "CompletionCreateParamsBase",
     "Message",
     "MessageChatCompletionRequestSystemMessage",
+    "MessageChatCompletionRequestSystemMessageContent",
+    "MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestSystemMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestDeveloperMessage",
+    "MessageChatCompletionRequestDeveloperMessageContent",
+    "MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestUserMessage",
+    "MessageChatCompletionRequestUserMessageContent",
+    "MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestUserMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestAssistantMessage",
+    "MessageChatCompletionRequestAssistantMessageContent",
+    "MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnionMember1",
+    "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "MessageChatCompletionRequestAssistantMessageToolCall",
     "MessageChatCompletionRequestAssistantMessageToolCallFunction",
     "MessageChatCompletionRequestToolMessage",
+    "MessageChatCompletionRequestToolMessageContent",
+    "MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartTextCacheControl",
+    "MessageChatCompletionRequestToolMessageContentArrayOfContentPart",
+    "MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartText",
+    "MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl",
     "StreamOptions",
     "ToolChoice",
     "ToolChoiceChatCompletionNamedToolChoice",
@@ -165,7 +187,46 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
 
-class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
     """Content part with type and text"""
 
     text: Required[str]
@@ -174,9 +235,18 @@ class MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMem
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestSystemMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestSystemMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestSystemMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestSystemMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestSystemMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestSystemMessageContentArrayOfContentPart],
 ]
 
 
@@ -186,14 +256,26 @@ class MessageChatCompletionRequestSystemMessage(TypedDict, total=False):
     messages sent by the user.
     """
 
-    content: Required[Union[str, SequenceNotStr[MessageChatCompletionRequestSystemMessageContentArrayOfContentPart]]]
+    content: Required[MessageChatCompletionRequestSystemMessageContent]
     """The contents of the system message."""
 
     role: Required[Literal["system"]]
     """The role of the messages author, in this case `system`."""
 
 
-class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
     """Content part with type and text"""
 
     text: Required[str]
@@ -202,9 +284,45 @@ class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnion
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestDeveloperMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestDeveloperMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart],
 ]
 
 
@@ -214,14 +332,26 @@ class MessageChatCompletionRequestDeveloperMessage(TypedDict, total=False):
     messages sent by the user.
     """
 
-    content: Required[Union[str, SequenceNotStr[MessageChatCompletionRequestDeveloperMessageContentArrayOfContentPart]]]
+    content: Required[MessageChatCompletionRequestDeveloperMessageContent]
     """The contents of the developer message."""
 
     role: Required[Literal["developer"]]
     """The role of the messages author, in this case `developer`."""
 
 
-class MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
     """Content part with type and text"""
 
     text: Required[str]
@@ -230,9 +360,47 @@ class MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMembe
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: (
+        MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    )
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestUserMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestUserMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestUserMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestUserMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestUserMessageContentArrayOfContentPart],
 ]
 
 
@@ -242,14 +410,26 @@ class MessageChatCompletionRequestUserMessage(TypedDict, total=False):
     information.
     """
 
-    content: Required[Union[str, SequenceNotStr[MessageChatCompletionRequestUserMessageContentArrayOfContentPart]]]
+    content: Required[MessageChatCompletionRequestUserMessageContent]
     """The contents of the user message."""
 
     role: Required[Literal["user"]]
     """The role of the messages author, in this case `user`."""
 
 
-class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnionMember1(TypedDict, total=False):
+class MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
     """Content part with type and text"""
 
     text: Required[str]
@@ -258,9 +438,45 @@ class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnion
     type: Required[Literal["text"]]
     """The type of content part"""
 
+    cache_control: MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
 
 MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart: TypeAlias = Union[
-    str, MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartUnionMember1
+    str, MessageChatCompletionRequestAssistantMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestAssistantMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestAssistantMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart],
 ]
 
 
@@ -296,15 +512,80 @@ class MessageChatCompletionRequestAssistantMessage(TypedDict, total=False):
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
-    content: Union[str, SequenceNotStr[MessageChatCompletionRequestAssistantMessageContentArrayOfContentPart], None]
+    content: Optional[MessageChatCompletionRequestAssistantMessageContent]
     """The contents of the assistant message."""
 
     tool_calls: Iterable[MessageChatCompletionRequestAssistantMessageToolCall]
     """The tool calls generated by the model, such as function calls."""
 
 
+class MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartText(TypedDict, total=False):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartTextCacheControl
+    """Cache control settings for the content part."""
+
+
+class MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl(
+    TypedDict, total=False
+):
+    """Cache control settings for the content part."""
+
+    type: Required[Literal["ephemeral"]]
+    """The cache control type."""
+
+    ttl: Literal["5m", "1h"]
+    """The cache TTL."""
+
+
+class MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartText(
+    TypedDict, total=False
+):
+    """Content part with type and text"""
+
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["text"]]
+    """The type of content part"""
+
+    cache_control: (
+        MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartTextCacheControl
+    )
+    """Cache control settings for the content part."""
+
+
+MessageChatCompletionRequestToolMessageContentArrayOfContentPart: TypeAlias = Union[
+    str, MessageChatCompletionRequestToolMessageContentArrayOfContentPartChatCompletionRequestContentPartText
+]
+
+MessageChatCompletionRequestToolMessageContent: TypeAlias = Union[
+    str,
+    MessageChatCompletionRequestToolMessageContentChatCompletionRequestContentPartText,
+    SequenceNotStr[MessageChatCompletionRequestToolMessageContentArrayOfContentPart],
+]
+
+
 class MessageChatCompletionRequestToolMessage(TypedDict, total=False):
-    content: Required[str]
+    content: Required[MessageChatCompletionRequestToolMessageContent]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]
diff --git a/src/gradient/types/knowledge_base_create_params.py b/src/gradient/types/knowledge_base_create_params.py
index 24cfd98b..2c7bece1 100644
--- a/src/gradient/types/knowledge_base_create_params.py
+++ b/src/gradient/types/knowledge_base_create_params.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Iterable
-from typing_extensions import TypedDict
+from typing_extensions import Literal, TypedDict
 
 from .._types import SequenceNotStr
 from .knowledge_bases.aws_data_source_param import AwsDataSourceParam
@@ -11,7 +11,13 @@
 from .knowledge_bases.api_file_upload_data_source_param import APIFileUploadDataSourceParam
 from .knowledge_bases.api_web_crawler_data_source_param import APIWebCrawlerDataSourceParam
 
-__all__ = ["KnowledgeBaseCreateParams", "Datasource", "DatasourceDropboxDataSource", "DatasourceGoogleDriveDataSource"]
+__all__ = [
+    "KnowledgeBaseCreateParams",
+    "Datasource",
+    "DatasourceChunkingOptions",
+    "DatasourceDropboxDataSource",
+    "DatasourceGoogleDriveDataSource",
+]
 
 
 class KnowledgeBaseCreateParams(TypedDict, total=False):
@@ -52,6 +58,25 @@ class KnowledgeBaseCreateParams(TypedDict, total=False):
     """The VPC to deploy the knowledge base database in"""
 
 
+class DatasourceChunkingOptions(TypedDict, total=False):
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature preview flag.**
+    """
+
+    child_chunk_size: int
+    """Hierarchical options"""
+
+    max_chunk_size: int
+    """Section_Based and Fixed_Length options"""
+
+    parent_chunk_size: int
+    """Hierarchical options"""
+
+    semantic_threshold: float
+    """Semantic options"""
+
+
 class DatasourceDropboxDataSource(TypedDict, total=False):
     """Dropbox Data Source"""
 
@@ -88,6 +113,26 @@ class Datasource(TypedDict, total=False):
     bucket_region: str
     """Deprecated, moved to data_source_details"""
 
+    chunking_algorithm: Literal[
+        "CHUNKING_ALGORITHM_UNKNOWN",
+        "CHUNKING_ALGORITHM_SECTION_BASED",
+        "CHUNKING_ALGORITHM_HIERARCHICAL",
+        "CHUNKING_ALGORITHM_SEMANTIC",
+        "CHUNKING_ALGORITHM_FIXED_LENGTH",
+    ]
+    """The chunking algorithm to use for processing data sources.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
+    chunking_options: DatasourceChunkingOptions
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
     dropbox_data_source: DatasourceDropboxDataSource
     """Dropbox Data Source"""
 
diff --git a/src/gradient/types/knowledge_bases/__init__.py b/src/gradient/types/knowledge_bases/__init__.py
index a8ce2cc7..e7b88be8 100644
--- a/src/gradient/types/knowledge_bases/__init__.py
+++ b/src/gradient/types/knowledge_bases/__init__.py
@@ -10,12 +10,14 @@
 from .indexing_job_list_params import IndexingJobListParams as IndexingJobListParams
 from .data_source_create_params import DataSourceCreateParams as DataSourceCreateParams
 from .data_source_list_response import DataSourceListResponse as DataSourceListResponse
+from .data_source_update_params import DataSourceUpdateParams as DataSourceUpdateParams
 from .indexing_job_create_params import IndexingJobCreateParams as IndexingJobCreateParams
 from .indexing_job_list_response import IndexingJobListResponse as IndexingJobListResponse
 from .api_file_upload_data_source import APIFileUploadDataSource as APIFileUploadDataSource
 from .api_web_crawler_data_source import APIWebCrawlerDataSource as APIWebCrawlerDataSource
 from .data_source_create_response import DataSourceCreateResponse as DataSourceCreateResponse
 from .data_source_delete_response import DataSourceDeleteResponse as DataSourceDeleteResponse
+from .data_source_update_response import DataSourceUpdateResponse as DataSourceUpdateResponse
 from .api_spaces_data_source_param import APISpacesDataSourceParam as APISpacesDataSourceParam
 from .indexing_job_create_response import IndexingJobCreateResponse as IndexingJobCreateResponse
 from .api_knowledge_base_data_source import APIKnowledgeBaseDataSource as APIKnowledgeBaseDataSource
diff --git a/src/gradient/types/knowledge_bases/api_knowledge_base_data_source.py b/src/gradient/types/knowledge_bases/api_knowledge_base_data_source.py
index b73e325e..ef5db1a5 100644
--- a/src/gradient/types/knowledge_bases/api_knowledge_base_data_source.py
+++ b/src/gradient/types/knowledge_bases/api_knowledge_base_data_source.py
@@ -2,6 +2,7 @@
 
 from typing import Optional
 from datetime import datetime
+from typing_extensions import Literal
 
 from ..._models import BaseModel
 from .api_spaces_data_source import APISpacesDataSource
@@ -9,7 +10,13 @@
 from .api_file_upload_data_source import APIFileUploadDataSource
 from .api_web_crawler_data_source import APIWebCrawlerDataSource
 
-__all__ = ["APIKnowledgeBaseDataSource", "AwsDataSource", "DropboxDataSource", "GoogleDriveDataSource"]
+__all__ = [
+    "APIKnowledgeBaseDataSource",
+    "AwsDataSource",
+    "ChunkingOptions",
+    "DropboxDataSource",
+    "GoogleDriveDataSource",
+]
 
 
 class AwsDataSource(BaseModel):
@@ -24,6 +31,25 @@ class AwsDataSource(BaseModel):
     """Region of bucket"""
 
 
+class ChunkingOptions(BaseModel):
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature preview flag.**
+    """
+
+    child_chunk_size: Optional[int] = None
+    """Hierarchical options"""
+
+    max_chunk_size: Optional[int] = None
+    """Section_Based and Fixed_Length options"""
+
+    parent_chunk_size: Optional[int] = None
+    """Hierarchical options"""
+
+    semantic_threshold: Optional[float] = None
+    """Semantic options"""
+
+
 class DropboxDataSource(BaseModel):
     """Dropbox Data Source for Display"""
 
@@ -48,6 +74,28 @@ class APIKnowledgeBaseDataSource(BaseModel):
     bucket_name: Optional[str] = None
     """Name of storage bucket - Deprecated, moved to data_source_details"""
 
+    chunking_algorithm: Optional[
+        Literal[
+            "CHUNKING_ALGORITHM_UNKNOWN",
+            "CHUNKING_ALGORITHM_SECTION_BASED",
+            "CHUNKING_ALGORITHM_HIERARCHICAL",
+            "CHUNKING_ALGORITHM_SEMANTIC",
+            "CHUNKING_ALGORITHM_FIXED_LENGTH",
+        ]
+    ] = None
+    """The chunking algorithm to use for processing data sources.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
+    chunking_options: Optional[ChunkingOptions] = None
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
     created_at: Optional[datetime] = None
     """Creation date / time"""
 
diff --git a/src/gradient/types/knowledge_bases/data_source_create_params.py b/src/gradient/types/knowledge_bases/data_source_create_params.py
index ac3aa93c..bc65e42a 100644
--- a/src/gradient/types/knowledge_bases/data_source_create_params.py
+++ b/src/gradient/types/knowledge_bases/data_source_create_params.py
@@ -2,20 +2,40 @@
 
 from __future__ import annotations
 
-from typing_extensions import Annotated, TypedDict
+from typing_extensions import Literal, Annotated, TypedDict
 
 from ..._utils import PropertyInfo
 from .aws_data_source_param import AwsDataSourceParam
 from .api_spaces_data_source_param import APISpacesDataSourceParam
 from .api_web_crawler_data_source_param import APIWebCrawlerDataSourceParam
 
-__all__ = ["DataSourceCreateParams"]
+__all__ = ["DataSourceCreateParams", "ChunkingOptions"]
 
 
 class DataSourceCreateParams(TypedDict, total=False):
     aws_data_source: AwsDataSourceParam
     """AWS S3 Data Source"""
 
+    chunking_algorithm: Literal[
+        "CHUNKING_ALGORITHM_UNKNOWN",
+        "CHUNKING_ALGORITHM_SECTION_BASED",
+        "CHUNKING_ALGORITHM_HIERARCHICAL",
+        "CHUNKING_ALGORITHM_SEMANTIC",
+        "CHUNKING_ALGORITHM_FIXED_LENGTH",
+    ]
+    """The chunking algorithm to use for processing data sources.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
+    chunking_options: ChunkingOptions
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
     body_knowledge_base_uuid: Annotated[str, PropertyInfo(alias="knowledge_base_uuid")]
     """Knowledge base id"""
 
@@ -24,3 +44,22 @@ class DataSourceCreateParams(TypedDict, total=False):
 
     web_crawler_data_source: APIWebCrawlerDataSourceParam
     """WebCrawlerDataSource"""
+
+
+class ChunkingOptions(TypedDict, total=False):
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature preview flag.**
+    """
+
+    child_chunk_size: int
+    """Hierarchical options"""
+
+    max_chunk_size: int
+    """Section_Based and Fixed_Length options"""
+
+    parent_chunk_size: int
+    """Hierarchical options"""
+
+    semantic_threshold: float
+    """Semantic options"""
diff --git a/src/gradient/types/knowledge_bases/data_source_update_params.py b/src/gradient/types/knowledge_bases/data_source_update_params.py
new file mode 100644
index 00000000..ffcdf95b
--- /dev/null
+++ b/src/gradient/types/knowledge_bases/data_source_update_params.py
@@ -0,0 +1,58 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from ..._utils import PropertyInfo
+
+__all__ = ["DataSourceUpdateParams", "ChunkingOptions"]
+
+
+class DataSourceUpdateParams(TypedDict, total=False):
+    path_knowledge_base_uuid: Required[Annotated[str, PropertyInfo(alias="knowledge_base_uuid")]]
+
+    chunking_algorithm: Literal[
+        "CHUNKING_ALGORITHM_UNKNOWN",
+        "CHUNKING_ALGORITHM_SECTION_BASED",
+        "CHUNKING_ALGORITHM_HIERARCHICAL",
+        "CHUNKING_ALGORITHM_SEMANTIC",
+        "CHUNKING_ALGORITHM_FIXED_LENGTH",
+    ]
+    """The chunking algorithm to use for processing data sources.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
+    chunking_options: ChunkingOptions
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature
+    preview flag.**
+    """
+
+    body_data_source_uuid: Annotated[str, PropertyInfo(alias="data_source_uuid")]
+    """Data Source ID (Path Parameter)"""
+
+    body_knowledge_base_uuid: Annotated[str, PropertyInfo(alias="knowledge_base_uuid")]
+    """Knowledge Base ID (Path Parameter)"""
+
+
+class ChunkingOptions(TypedDict, total=False):
+    """Configuration options for the chunking algorithm.
+
+    **Note: This feature requires enabling the knowledgebase enhancements feature preview flag.**
+    """
+
+    child_chunk_size: int
+    """Hierarchical options"""
+
+    max_chunk_size: int
+    """Section_Based and Fixed_Length options"""
+
+    parent_chunk_size: int
+    """Hierarchical options"""
+
+    semantic_threshold: float
+    """Semantic options"""
diff --git a/src/gradient/types/knowledge_bases/data_source_update_response.py b/src/gradient/types/knowledge_bases/data_source_update_response.py
new file mode 100644
index 00000000..31484137
--- /dev/null
+++ b/src/gradient/types/knowledge_bases/data_source_update_response.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .api_knowledge_base_data_source import APIKnowledgeBaseDataSource
+
+__all__ = ["DataSourceUpdateResponse"]
+
+
+class DataSourceUpdateResponse(BaseModel):
+    """
+    Update a data source of a knowledge base with change in chunking algorithm/options
+    """
+
+    knowledge_base_data_source: Optional[APIKnowledgeBaseDataSource] = None
+    """Data Source configuration for Knowledge Bases"""
diff --git a/src/gradient/types/response_create_params.py b/src/gradient/types/response_create_params.py
new file mode 100644
index 00000000..9b870f3c
--- /dev/null
+++ b/src/gradient/types/response_create_params.py
@@ -0,0 +1,331 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = [
+    "ResponseCreateParamsBase",
+    "InputUnionMember1",
+    "InputUnionMember1UnionMember0",
+    "InputUnionMember1UnionMember0Content",
+    "InputUnionMember1UnionMember1",
+    "InputUnionMember1UnionMember1ContentUnionMember1",
+    "InputUnionMember1UnionMember1ContentUnionMember1UnionMember0",
+    "InputUnionMember1UnionMember1ToolCall",
+    "InputUnionMember1UnionMember1ToolCallFunction",
+    "StreamOptions",
+    "ToolChoice",
+    "ToolChoiceChatCompletionNamedToolChoice",
+    "ToolChoiceChatCompletionNamedToolChoiceFunction",
+    "Tool",
+    "ResponseCreateParamsNonStreaming",
+    "ResponseCreateParamsStreaming",
+]
+
+
+class ResponseCreateParamsBase(TypedDict, total=False):
+    input: Required[Union[str, Iterable[InputUnionMember1]]]
+    """The input text prompt or conversation history.
+
+    Can be a string or an array of message objects for conversation context.
+    """
+
+    model: Required[str]
+    """Model ID used to generate the response. Must be a VLLM model."""
+
+    instructions: Optional[str]
+    """System-level instructions for the model.
+
+    This sets the behavior and context for the response generation.
+    """
+
+    max_output_tokens: Optional[int]
+    """Maximum number of tokens to generate in the response.
+
+    If not specified, the model will use a default value.
+    """
+
+    max_tokens: Optional[int]
+    """The maximum number of tokens that can be generated in the completion.
+
+    Alias for max_output_tokens for compatibility.
+    """
+
+    metadata: Optional[Dict[str, str]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text"]]]
+    """Specifies the output types the model should generate.
+
+    For text-to-text, this should be ["text"].
+    """
+
+    parallel_tool_calls: Optional[bool]
+    """Whether to enable parallel tool calls.
+
+    When true, the model can make multiple tool calls in parallel.
+    """
+
+    stop: Union[Optional[str], SequenceNotStr[str], None]
+    """Up to 4 sequences where the API will stop generating further tokens.
+
+    The returned text will not contain the stop sequence.
+    """
+
+    stream_options: Optional[StreamOptions]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    tool_choice: ToolChoice
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tool and instead generates a message. `auto` means the model can
+    pick between generating a message or calling one or more tools. `required` means
+    the model must call one or more tools. Specifying a particular tool via
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+
+    `none` is the default when no tools are present. `auto` is the default if tools
+    are present.
+    """
+
+    tools: Iterable[Tool]
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Uses Responses API format
+    (with `name`, `description`, `parameters` at top level).
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help DigitalOcean to
+    monitor and detect abuse.
+    """
+
+
+class InputUnionMember1UnionMember0ContentTyped(TypedDict, total=False):
+    text: str
+    """The reasoning text content"""
+
+    type: Literal["reasoning_text"]
+    """The type of content"""
+
+
+InputUnionMember1UnionMember0Content: TypeAlias = Union[InputUnionMember1UnionMember0ContentTyped, Dict[str, object]]
+
+
+class InputUnionMember1UnionMember0Typed(TypedDict, total=False):
+    type: Required[Literal["function_call", "function_call_output", "reasoning"]]
+    """
+    The type of input item (must be function_call, function_call_output, or
+    reasoning)
+    """
+
+    id: str
+    """The unique ID of the reasoning item (optional for reasoning)"""
+
+    arguments: str
+    """JSON string of function arguments (required for function_call)"""
+
+    call_id: str
+    """The call ID (required for function_call and function_call_output)"""
+
+    content: Optional[Iterable[InputUnionMember1UnionMember0Content]]
+    """Array of reasoning content parts (optional for reasoning, can be null)"""
+
+    encrypted_content: Optional[str]
+    """Encrypted content (optional)"""
+
+    name: str
+    """The function name (required for function_call)"""
+
+    output: str
+    """JSON string of function output (required for function_call_output)"""
+
+    status: Optional[str]
+    """Status of the item (optional, can be null)"""
+
+    summary: Iterable[object]
+    """Summary of the reasoning (optional for reasoning)"""
+
+
+InputUnionMember1UnionMember0: TypeAlias = Union[InputUnionMember1UnionMember0Typed, Dict[str, object]]
+
+
+class InputUnionMember1UnionMember1ContentUnionMember1UnionMember0(TypedDict, total=False):
+    text: Required[str]
+    """The text content"""
+
+    type: Required[Literal["input_text"]]
+    """The type of content part"""
+
+
+InputUnionMember1UnionMember1ContentUnionMember1: TypeAlias = Union[
+    InputUnionMember1UnionMember1ContentUnionMember1UnionMember0, Dict[str, object]
+]
+
+
+class InputUnionMember1UnionMember1ToolCallFunction(TypedDict, total=False):
+    """The function that the model called."""
+
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class InputUnionMember1UnionMember1ToolCall(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the tool call."""
+
+    function: Required[InputUnionMember1UnionMember1ToolCallFunction]
+    """The function that the model called."""
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class InputUnionMember1UnionMember1Typed(TypedDict, total=False):
+    content: Required[Union[str, Iterable[InputUnionMember1UnionMember1ContentUnionMember1]]]
+    """The content of the message (string or content parts array)"""
+
+    role: Literal["user", "assistant", "system", "tool", "developer"]
+    """The role of the message author"""
+
+    tool_call_id: str
+    """Tool call ID that this message is responding to (required for tool role)"""
+
+    tool_calls: Iterable[InputUnionMember1UnionMember1ToolCall]
+    """Tool calls made by the assistant (for assistant role messages)"""
+
+    type: Literal["message"]
+    """Optional type identifier for message items (used by some clients like Codex)"""
+
+
+InputUnionMember1UnionMember1: TypeAlias = Union[InputUnionMember1UnionMember1Typed, Dict[str, object]]
+
+InputUnionMember1: TypeAlias = Union[InputUnionMember1UnionMember0, InputUnionMember1UnionMember1]
+
+
+class StreamOptions(TypedDict, total=False):
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
+    include_usage: bool
+    """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+    The `usage` field on this chunk shows the token usage statistics for the entire
+    request, and the `choices` field will always be an empty array.
+
+    All other chunks will also include a `usage` field, but with a null value.
+    **NOTE:** If the stream is interrupted, you may not receive the final usage
+    chunk which contains the total token usage for the request.
+    """
+
+
+class ToolChoiceChatCompletionNamedToolChoiceFunction(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ToolChoiceChatCompletionNamedToolChoice(TypedDict, total=False):
+    """Specifies a tool the model should use.
+
+    Use to force the model to call a specific function.
+    """
+
+    function: Required[ToolChoiceChatCompletionNamedToolChoiceFunction]
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+ToolChoice: TypeAlias = Union[Literal["none", "auto", "required"], ToolChoiceChatCompletionNamedToolChoice]
+
+
+class Tool(TypedDict, total=False):
+    """Tool definition for Responses API (flat format).
+
+    This format is used by VLLM's Responses API where name, description, and parameters are at the top level of the tool object.
+    """
+
+    type: Required[Literal["function", "web_search", "web_search_2025_08_26"]]
+    """The type of the tool.
+
+    Supported values are `function` (custom tools), `web_search`, and
+    `web_search_2025_08_26` (built-in web search).
+    """
+
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    name: str
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    parameters: Dict[str, object]
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the [guide](/docs/guides/function-calling) for examples, and the
+    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
+    documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
+
+
+class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using server-sent events.
+    """
+
+
+class ResponseCreateParamsStreaming(ResponseCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using server-sent events.
+    """
+
+
+ResponseCreateParams = Union[ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming]
diff --git a/src/gradient/types/retrieve_documents_params.py b/src/gradient/types/retrieve_documents_params.py
index bad99ad3..359d8a07 100644
--- a/src/gradient/types/retrieve_documents_params.py
+++ b/src/gradient/types/retrieve_documents_params.py
@@ -73,4 +73,3 @@ class Filters(TypedDict, total=False):
 
     should: Iterable[FiltersShould]
     """At least one condition must match (OR)"""
-
diff --git a/src/gradient/types/shared/__init__.py b/src/gradient/types/shared/__init__.py
index 4fb2986a..272092b7 100644
--- a/src/gradient/types/shared/__init__.py
+++ b/src/gradient/types/shared/__init__.py
@@ -26,7 +26,9 @@
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .image_gen_stream_event import ImageGenStreamEvent as ImageGenStreamEvent
 from .subscription_tier_base import SubscriptionTierBase as SubscriptionTierBase
+from .create_response_response import CreateResponseResponse as CreateResponseResponse
 from .image_gen_completed_event import ImageGenCompletedEvent as ImageGenCompletedEvent
 from .droplet_next_backup_window import DropletNextBackupWindow as DropletNextBackupWindow
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
 from .image_gen_partial_image_event import ImageGenPartialImageEvent as ImageGenPartialImageEvent
+from .create_response_stream_response import CreateResponseStreamResponse as CreateResponseStreamResponse
diff --git a/src/gradient/types/shared/completion_usage.py b/src/gradient/types/shared/completion_usage.py
index 79ce64ee..596bd045 100644
--- a/src/gradient/types/shared/completion_usage.py
+++ b/src/gradient/types/shared/completion_usage.py
@@ -2,12 +2,31 @@
 
 from ..._models import BaseModel
 
-__all__ = ["CompletionUsage"]
+__all__ = ["CompletionUsage", "CacheCreation"]
+
+
+class CacheCreation(BaseModel):
+    """Breakdown of prompt tokens written to cache."""
+
+    ephemeral_1h_input_tokens: int
+    """Number of prompt tokens written to 1h cache."""
+
+    ephemeral_5m_input_tokens: int
+    """Number of prompt tokens written to 5m cache."""
 
 
 class CompletionUsage(BaseModel):
     """Usage statistics for the completion request."""
 
+    cache_created_input_tokens: int
+    """Number of prompt tokens written to cache."""
+
+    cache_creation: CacheCreation
+    """Breakdown of prompt tokens written to cache."""
+
+    cache_read_input_tokens: int
+    """Number of prompt tokens read from cache."""
+
     completion_tokens: int
     """Number of tokens in the generated completion."""
 
diff --git a/src/gradient/types/shared/create_response_response.py b/src/gradient/types/shared/create_response_response.py
new file mode 100644
index 00000000..61dbb155
--- /dev/null
+++ b/src/gradient/types/shared/create_response_response.py
@@ -0,0 +1,332 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = [
+    "CreateResponseResponse",
+    "Usage",
+    "UsageInputTokensDetails",
+    "UsageOutputTokensDetails",
+    "Choice",
+    "ChoiceMessage",
+    "ChoiceMessageToolCall",
+    "ChoiceMessageToolCallFunction",
+    "ChoiceLogprobs",
+    "Output",
+    "OutputUnionMember0",
+    "OutputUnionMember1",
+    "OutputUnionMember2",
+    "OutputUnionMember2Content",
+    "Tool",
+]
+
+
+class UsageInputTokensDetails(BaseModel):
+    """A detailed breakdown of the input tokens."""
+
+    cached_tokens: int
+    """The number of tokens that were retrieved from the cache.
+
+    [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+
+class UsageOutputTokensDetails(BaseModel):
+    """A detailed breakdown of the output tokens."""
+
+    reasoning_tokens: int
+    """The number of reasoning tokens."""
+
+    tool_output_tokens: int
+    """The number of tool output tokens."""
+
+
+class Usage(BaseModel):
+    """
+    Detailed token usage statistics for the request, including input/output token counts and detailed breakdowns.
+    """
+
+    input_tokens: int
+    """The number of input tokens."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """A detailed breakdown of the input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: UsageOutputTokensDetails
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class ChoiceMessageToolCallFunction(BaseModel):
+    """The function that the model called."""
+
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChoiceMessageToolCall(BaseModel):
+    id: str
+    """The ID of the tool call."""
+
+    function: ChoiceMessageToolCallFunction
+    """The function that the model called."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChoiceMessage(BaseModel):
+    """The generated message response."""
+
+    content: Optional[str] = None
+    """The generated text content."""
+
+    role: Optional[Literal["assistant"]] = None
+    """The role of the message author, which is always `assistant`."""
+
+    tool_calls: Optional[List[ChoiceMessageToolCall]] = None
+    """The tool calls generated by the model, such as function calls."""
+
+
+class ChoiceLogprobs(BaseModel):
+    """Log probability information for the choice.
+
+    Only present if logprobs was requested in the request.
+    """
+
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+
+class Choice(BaseModel):
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter"]
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, or `tool_calls` if the model called a tool.
+    """
+
+    index: int
+    """The index of the choice in the list of choices."""
+
+    message: ChoiceMessage
+    """The generated message response."""
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice.
+
+    Only present if logprobs was requested in the request.
+    """
+
+
+class OutputUnionMember0(BaseModel):
+    arguments: str
+    """JSON string of function arguments"""
+
+    call_id: str
+    """The unique ID of the function tool call"""
+
+    name: str
+    """The name of the function to call"""
+
+    type: Literal["function_call"]
+    """The type of output item"""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call (same as call_id)"""
+
+    encrypted_content: Optional[str] = None
+    """Encrypted content (optional)"""
+
+    status: Optional[str] = None
+    """Status of the item (optional, can be null)"""
+
+
+class OutputUnionMember1(BaseModel):
+    text: str
+    """The text content"""
+
+    type: Literal["text"]
+    """The type of output item"""
+
+
+class OutputUnionMember2Content(BaseModel):
+    text: str
+    """The reasoning text content"""
+
+    type: Literal["reasoning_text"]
+    """The type of content"""
+
+
+class OutputUnionMember2(BaseModel):
+    id: str
+    """The unique ID of the reasoning item"""
+
+    content: List[OutputUnionMember2Content]
+    """Array of reasoning content parts"""
+
+    summary: List[object]
+    """Summary of the reasoning (usually empty)"""
+
+    type: Literal["reasoning"]
+    """The type of output item"""
+
+    encrypted_content: Optional[str] = None
+    """Encrypted content (optional)"""
+
+    status: Optional[str] = None
+    """Status of the item (optional, can be null)"""
+
+
+Output: TypeAlias = Union[OutputUnionMember0, OutputUnionMember1, OutputUnionMember2]
+
+
+class Tool(BaseModel):
+    """Tool definition for Responses API (flat format).
+
+    This format is used by VLLM's Responses API where name, description, and parameters are at the top level of the tool object.
+    """
+
+    type: Literal["function", "web_search", "web_search_2025_08_26"]
+    """The type of the tool.
+
+    Supported values are `function` (custom tools), `web_search`, and
+    `web_search_2025_08_26` (built-in web search).
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    name: Optional[str] = None
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    parameters: Optional[Dict[str, object]] = None
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the [guide](/docs/guides/function-calling) for examples, and the
+    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
+    documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
+
+
+class CreateResponseResponse(BaseModel):
+    """
+    Represents a text-to-text response returned by the model, based on the provided input. VLLM models only.
+    """
+
+    id: str
+    """A unique identifier for the response."""
+
+    created: int
+    """The Unix timestamp (in seconds) of when the response was created."""
+
+    model: str
+    """The model used to generate the response."""
+
+    object: Literal["response"]
+    """The object type, which is always `response`."""
+
+    usage: Usage
+    """
+    Detailed token usage statistics for the request, including input/output token
+    counts and detailed breakdowns.
+    """
+
+    background: Optional[bool] = None
+    """Whether the request was processed in the background"""
+
+    choices: Optional[List[Choice]] = None
+    """A list of response choices.
+
+    Can be more than one if `n` is greater than 1. Optional - Responses API
+    primarily uses the output array.
+    """
+
+    input_messages: Optional[List[builtins.object]] = None
+    """Input messages (if applicable)"""
+
+    max_output_tokens: Optional[int] = None
+    """Maximum output tokens setting"""
+
+    max_tool_calls: Optional[int] = None
+    """Maximum tool calls setting"""
+
+    output: Optional[List[Output]] = None
+    """An array of content items generated by the model.
+
+    This includes text content, function calls, reasoning items, and other output
+    types. Use this field for Responses API compatibility.
+    """
+
+    output_messages: Optional[List[builtins.object]] = None
+    """Output messages (if applicable)"""
+
+    parallel_tool_calls: Optional[bool] = None
+    """Whether parallel tool calls are enabled"""
+
+    previous_response_id: Optional[str] = None
+    """Previous response ID (for multi-turn conversations)"""
+
+    prompt: Optional[str] = None
+    """Prompt used for the response"""
+
+    reasoning: Optional[str] = None
+    """Reasoning content"""
+
+    service_tier: Optional[str] = None
+    """Service tier used"""
+
+    status: Optional[str] = None
+    """Status of the response"""
+
+    temperature: Optional[float] = None
+    """Temperature setting used for the response"""
+
+    text: Optional[str] = None
+    """Text content"""
+
+    tool_choice: Optional[str] = None
+    """Tool choice setting used for the response"""
+
+    tools: Optional[List[Tool]] = None
+    """Tools available for the response"""
+
+    top_logprobs: Optional[int] = None
+    """Top logprobs setting"""
+
+    top_p: Optional[float] = None
+    """Top-p setting used for the response"""
+
+    truncation: Optional[str] = None
+    """Truncation setting"""
+
+    user: Optional[str] = None
+    """User identifier"""
diff --git a/src/gradient/types/shared/create_response_stream_response.py b/src/gradient/types/shared/create_response_stream_response.py
new file mode 100644
index 00000000..ef0230c8
--- /dev/null
+++ b/src/gradient/types/shared/create_response_stream_response.py
@@ -0,0 +1,139 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .completion_usage import CompletionUsage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = [
+    "CreateResponseStreamResponse",
+    "Choice",
+    "ChoiceDelta",
+    "ChoiceDeltaToolCall",
+    "ChoiceDeltaToolCallFunction",
+    "ChoiceLogprobs",
+]
+
+
+class ChoiceDeltaToolCallFunction(BaseModel):
+    """The function that the model called."""
+
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChoiceDeltaToolCall(BaseModel):
+    id: str
+    """The ID of the tool call."""
+
+    function: ChoiceDeltaToolCallFunction
+    """The function that the model called."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChoiceDelta(BaseModel):
+    """A chunk of the response message generated by the model."""
+
+    content: Optional[str] = None
+    """The contents of the chunk message.
+
+    Can be null for chunks with tool calls or other non-text content.
+    """
+
+    reasoning_content: Optional[str] = None
+    """The reasoning content generated by the model.
+
+    Only present when the model generates reasoning text.
+    """
+
+    role: Optional[Literal["assistant"]] = None
+    """The role of the message author. Only present in the first chunk."""
+
+    tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
+    """The tool calls generated by the model, such as function calls.
+
+    Only present when the model decides to call a tool.
+    """
+
+
+class ChoiceLogprobs(BaseModel):
+    """Log probability information for the choice.
+
+    Only present if logprobs was requested in the request.
+    """
+
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+
+class Choice(BaseModel):
+    delta: ChoiceDelta
+    """A chunk of the response message generated by the model."""
+
+    index: int
+    """The index of the choice in the list of choices."""
+
+    finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]] = None
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, or `tool_calls` if the model called a tool. Only present in the final
+    chunk.
+    """
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice.
+
+    Only present if logprobs was requested in the request.
+    """
+
+
+class CreateResponseStreamResponse(BaseModel):
+    """
+    Represents a streamed chunk of a text-to-text response returned by the model, based on the provided input. VLLM models only.
+    """
+
+    id: str
+    """A unique identifier for the response. Each chunk has the same ID."""
+
+    choices: List[Choice]
+    """A list of response choice chunks.
+
+    Can contain more than one element if `n` is greater than 1. Can also be empty
+    for the last chunk if you set `stream_options: {"include_usage": true}`.
+    """
+
+    created: int
+    """The Unix timestamp (in seconds) of when the response was created.
+
+    Each chunk has the same timestamp.
+    """
+
+    model: str
+    """The model used to generate the response."""
+
+    object: Literal["response.chunk"]
+    """The object type, which is always `response.chunk`."""
+
+    usage: Optional[CompletionUsage] = None
+    """
+    An optional field that will only be present when you set
+    `stream_options: {"include_usage": true}` in your request. When present, it
+    contains a null value **except for the last chunk** which contains the token
+    usage statistics for the entire request. **NOTE:** If the stream is interrupted
+    or cancelled, you may not receive the final usage chunk which contains the total
+    token usage for the request.
+    """
diff --git a/tests/api_resources/agents/test_evaluation_datasets.py b/tests/api_resources/agents/test_evaluation_datasets.py
index 64dceb03..5093660e 100644
--- a/tests/api_resources/agents/test_evaluation_datasets.py
+++ b/tests/api_resources/agents/test_evaluation_datasets.py
@@ -30,6 +30,7 @@ def test_method_create(self, client: Gradient) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: Gradient) -> None:
         evaluation_dataset = client.agents.evaluation_datasets.create(
+            dataset_type="EVALUATION_DATASET_TYPE_UNKNOWN",
             file_upload_dataset={
                 "original_file_name": "example name",
                 "size_in_bytes": "12345",
@@ -126,6 +127,7 @@ async def test_method_create(self, async_client: AsyncGradient) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGradient) -> None:
         evaluation_dataset = await async_client.agents.evaluation_datasets.create(
+            dataset_type="EVALUATION_DATASET_TYPE_UNKNOWN",
             file_upload_dataset={
                 "original_file_name": "example name",
                 "size_in_bytes": "12345",
diff --git a/tests/api_resources/agents/test_evaluation_runs.py b/tests/api_resources/agents/test_evaluation_runs.py
index c6acaf82..8fdfe2cd 100644
--- a/tests/api_resources/agents/test_evaluation_runs.py
+++ b/tests/api_resources/agents/test_evaluation_runs.py
@@ -32,6 +32,7 @@ def test_method_create(self, client: Gradient) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: Gradient) -> None:
         evaluation_run = client.agents.evaluation_runs.create(
+            agent_deployment_names=["example string"],
             agent_uuids=["example string"],
             run_name="Evaluation Run Name",
             test_case_uuid='"12345678-1234-1234-1234-123456789012"',
@@ -216,6 +217,7 @@ async def test_method_create(self, async_client: AsyncGradient) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGradient) -> None:
         evaluation_run = await async_client.agents.evaluation_runs.create(
+            agent_deployment_names=["example string"],
             agent_uuids=["example string"],
             run_name="Evaluation Run Name",
             test_case_uuid='"12345678-1234-1234-1234-123456789012"',
diff --git a/tests/api_resources/agents/test_evaluation_test_cases.py b/tests/api_resources/agents/test_evaluation_test_cases.py
index 7cd0a07e..a8942239 100644
--- a/tests/api_resources/agents/test_evaluation_test_cases.py
+++ b/tests/api_resources/agents/test_evaluation_test_cases.py
@@ -33,6 +33,7 @@ def test_method_create(self, client: Gradient) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: Gradient) -> None:
         evaluation_test_case = client.agents.evaluation_test_cases.create(
+            agent_workspace_name="example name",
             dataset_uuid="123e4567-e89b-12d3-a456-426614174000",
             description="example string",
             metrics=["example string"],
@@ -278,6 +279,7 @@ async def test_method_create(self, async_client: AsyncGradient) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGradient) -> None:
         evaluation_test_case = await async_client.agents.evaluation_test_cases.create(
+            agent_workspace_name="example name",
             dataset_uuid="123e4567-e89b-12d3-a456-426614174000",
             description="example string",
             metrics=["example string"],
diff --git a/tests/api_resources/apps/__init__.py b/tests/api_resources/apps/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/apps/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/apps/test_job_invocations.py b/tests/api_resources/apps/test_job_invocations.py
new file mode 100644
index 00000000..a7cb68df
--- /dev/null
+++ b/tests/api_resources/apps/test_job_invocations.py
@@ -0,0 +1,148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from gradient import Gradient, AsyncGradient
+from tests.utils import assert_matches_type
+from gradient.types.apps import JobInvocationCancelResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestJobInvocations:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_cancel(self, client: Gradient) -> None:
+        job_invocation = client.apps.job_invocations.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+        )
+        assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_cancel_with_all_params(self, client: Gradient) -> None:
+        job_invocation = client.apps.job_invocations.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+            job_name="job_name",
+        )
+        assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_raw_response_cancel(self, client: Gradient) -> None:
+        response = client.apps.job_invocations.with_raw_response.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job_invocation = response.parse()
+        assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_streaming_response_cancel(self, client: Gradient) -> None:
+        with client.apps.job_invocations.with_streaming_response.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job_invocation = response.parse()
+            assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_path_params_cancel(self, client: Gradient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `app_id` but received ''"):
+            client.apps.job_invocations.with_raw_response.cancel(
+                job_invocation_id="123e4567-e89b-12d3-a456-426",
+                app_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_invocation_id` but received ''"):
+            client.apps.job_invocations.with_raw_response.cancel(
+                job_invocation_id="",
+                app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+            )
+
+
+class TestAsyncJobInvocations:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncGradient) -> None:
+        job_invocation = await async_client.apps.job_invocations.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+        )
+        assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_cancel_with_all_params(self, async_client: AsyncGradient) -> None:
+        job_invocation = await async_client.apps.job_invocations.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+            job_name="job_name",
+        )
+        assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncGradient) -> None:
+        response = await async_client.apps.job_invocations.with_raw_response.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job_invocation = await response.parse()
+        assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncGradient) -> None:
+        async with async_client.apps.job_invocations.with_streaming_response.cancel(
+            job_invocation_id="123e4567-e89b-12d3-a456-426",
+            app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job_invocation = await response.parse()
+            assert_matches_type(JobInvocationCancelResponse, job_invocation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncGradient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `app_id` but received ''"):
+            await async_client.apps.job_invocations.with_raw_response.cancel(
+                job_invocation_id="123e4567-e89b-12d3-a456-426",
+                app_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_invocation_id` but received ''"):
+            await async_client.apps.job_invocations.with_raw_response.cancel(
+                job_invocation_id="",
+                app_id="4f6c71e2-1e90-4762-9fee-6cc4a0a9f2cf",
+            )
diff --git a/tests/api_resources/gpu_droplets/test_images.py b/tests/api_resources/gpu_droplets/test_images.py
index 4c4146e2..480f94a5 100644
--- a/tests/api_resources/gpu_droplets/test_images.py
+++ b/tests/api_resources/gpu_droplets/test_images.py
@@ -32,9 +32,9 @@ def test_method_create(self, client: Gradient) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: Gradient) -> None:
         image = client.gpu_droplets.images.create(
-            description=" ",
+            description="Cloud-optimized image w/ small footprint",
             distribution="Ubuntu",
-            name="Nifty New Snapshot",
+            name="ubuntu-18.04-minimal",
             region="nyc3",
             tags=["base-image", "prod"],
             url="http://cloud-images.ubuntu.com/minimal/releases/bionic/release/ubuntu-18.04-minimal-cloudimg-amd64.img",
@@ -232,9 +232,9 @@ async def test_method_create(self, async_client: AsyncGradient) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGradient) -> None:
         image = await async_client.gpu_droplets.images.create(
-            description=" ",
+            description="Cloud-optimized image w/ small footprint",
             distribution="Ubuntu",
-            name="Nifty New Snapshot",
+            name="ubuntu-18.04-minimal",
             region="nyc3",
             tags=["base-image", "prod"],
             url="http://cloud-images.ubuntu.com/minimal/releases/bionic/release/ubuntu-18.04-minimal-cloudimg-amd64.img",
diff --git a/tests/api_resources/knowledge_bases/test_data_sources.py b/tests/api_resources/knowledge_bases/test_data_sources.py
index ca721d93..d28fd409 100644
--- a/tests/api_resources/knowledge_bases/test_data_sources.py
+++ b/tests/api_resources/knowledge_bases/test_data_sources.py
@@ -13,6 +13,7 @@
     DataSourceListResponse,
     DataSourceCreateResponse,
     DataSourceDeleteResponse,
+    DataSourceUpdateResponse,
     DataSourceCreatePresignedURLsResponse,
 )
 
@@ -42,6 +43,13 @@ def test_method_create_with_all_params(self, client: Gradient) -> None:
                 "region": "example string",
                 "secret_key": "example string",
             },
+            chunking_algorithm="CHUNKING_ALGORITHM_SECTION_BASED",
+            chunking_options={
+                "child_chunk_size": 350,
+                "max_chunk_size": 750,
+                "parent_chunk_size": 1000,
+                "semantic_threshold": 0.5,
+            },
             body_knowledge_base_uuid='"12345678-1234-1234-1234-123456789012"',
             spaces_data_source={
                 "bucket_name": "example name",
@@ -93,6 +101,78 @@ def test_path_params_create(self, client: Gradient) -> None:
                 path_knowledge_base_uuid="",
             )
 
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_update(self, client: Gradient) -> None:
+        data_source = client.knowledge_bases.data_sources.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+        )
+        assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_update_with_all_params(self, client: Gradient) -> None:
+        data_source = client.knowledge_bases.data_sources.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+            chunking_algorithm="CHUNKING_ALGORITHM_SECTION_BASED",
+            chunking_options={
+                "child_chunk_size": 350,
+                "max_chunk_size": 750,
+                "parent_chunk_size": 1000,
+                "semantic_threshold": 0.5,
+            },
+            body_data_source_uuid="98765432-1234-1234-1234-123456789012",
+            body_knowledge_base_uuid="12345678-1234-1234-1234-123456789012",
+        )
+        assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_raw_response_update(self, client: Gradient) -> None:
+        response = client.knowledge_bases.data_sources.with_raw_response.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        data_source = response.parse()
+        assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_streaming_response_update(self, client: Gradient) -> None:
+        with client.knowledge_bases.data_sources.with_streaming_response.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            data_source = response.parse()
+            assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_path_params_update(self, client: Gradient) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `path_knowledge_base_uuid` but received ''"
+        ):
+            client.knowledge_bases.data_sources.with_raw_response.update(
+                path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+                path_knowledge_base_uuid="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `path_data_source_uuid` but received ''"):
+            client.knowledge_bases.data_sources.with_raw_response.update(
+                path_data_source_uuid="",
+                path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+            )
+
     @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Gradient) -> None:
@@ -264,6 +344,13 @@ async def test_method_create_with_all_params(self, async_client: AsyncGradient)
                 "region": "example string",
                 "secret_key": "example string",
             },
+            chunking_algorithm="CHUNKING_ALGORITHM_SECTION_BASED",
+            chunking_options={
+                "child_chunk_size": 350,
+                "max_chunk_size": 750,
+                "parent_chunk_size": 1000,
+                "semantic_threshold": 0.5,
+            },
             body_knowledge_base_uuid='"12345678-1234-1234-1234-123456789012"',
             spaces_data_source={
                 "bucket_name": "example name",
@@ -315,6 +402,78 @@ async def test_path_params_create(self, async_client: AsyncGradient) -> None:
                 path_knowledge_base_uuid="",
             )
 
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_update(self, async_client: AsyncGradient) -> None:
+        data_source = await async_client.knowledge_bases.data_sources.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+        )
+        assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncGradient) -> None:
+        data_source = await async_client.knowledge_bases.data_sources.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+            chunking_algorithm="CHUNKING_ALGORITHM_SECTION_BASED",
+            chunking_options={
+                "child_chunk_size": 350,
+                "max_chunk_size": 750,
+                "parent_chunk_size": 1000,
+                "semantic_threshold": 0.5,
+            },
+            body_data_source_uuid="98765432-1234-1234-1234-123456789012",
+            body_knowledge_base_uuid="12345678-1234-1234-1234-123456789012",
+        )
+        assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncGradient) -> None:
+        response = await async_client.knowledge_bases.data_sources.with_raw_response.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        data_source = await response.parse()
+        assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncGradient) -> None:
+        async with async_client.knowledge_bases.data_sources.with_streaming_response.update(
+            path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+            path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            data_source = await response.parse()
+            assert_matches_type(DataSourceUpdateResponse, data_source, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncGradient) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `path_knowledge_base_uuid` but received ''"
+        ):
+            await async_client.knowledge_bases.data_sources.with_raw_response.update(
+                path_data_source_uuid="123e4567-e89b-12d3-a456-426614174000",
+                path_knowledge_base_uuid="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `path_data_source_uuid` but received ''"):
+            await async_client.knowledge_bases.data_sources.with_raw_response.update(
+                path_data_source_uuid="",
+                path_knowledge_base_uuid="123e4567-e89b-12d3-a456-426614174000",
+            )
+
     @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncGradient) -> None:
diff --git a/tests/api_resources/test_billing.py b/tests/api_resources/test_billing.py
new file mode 100644
index 00000000..59181b15
--- /dev/null
+++ b/tests/api_resources/test_billing.py
@@ -0,0 +1,177 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from gradient import Gradient, AsyncGradient
+from tests.utils import assert_matches_type
+from gradient.types import BillingListInsightsResponse
+from gradient._utils import parse_date
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBilling:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_list_insights(self, client: Gradient) -> None:
+        billing = client.billing.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+        )
+        assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_list_insights_with_all_params(self, client: Gradient) -> None:
+        billing = client.billing.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+            page=1,
+            per_page=1,
+        )
+        assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_raw_response_list_insights(self, client: Gradient) -> None:
+        response = client.billing.with_raw_response.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        billing = response.parse()
+        assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_streaming_response_list_insights(self, client: Gradient) -> None:
+        with client.billing.with_streaming_response.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            billing = response.parse()
+            assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_path_params_list_insights(self, client: Gradient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `account_urn` but received ''"):
+            client.billing.with_raw_response.list_insights(
+                end_date=parse_date("2025-01-31"),
+                account_urn="",
+                start_date=parse_date("2025-01-01"),
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `start_date` but received ''"):
+            client.billing.with_raw_response.list_insights(
+                end_date=parse_date("2025-01-31"),
+                account_urn="do:team:12345678-1234-1234-1234-123456789012",
+                start_date="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `end_date` but received ''"):
+            client.billing.with_raw_response.list_insights(
+                end_date="",
+                account_urn="do:team:12345678-1234-1234-1234-123456789012",
+                start_date=parse_date("2025-01-01"),
+            )
+
+
+class TestAsyncBilling:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_list_insights(self, async_client: AsyncGradient) -> None:
+        billing = await async_client.billing.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+        )
+        assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_list_insights_with_all_params(self, async_client: AsyncGradient) -> None:
+        billing = await async_client.billing.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+            page=1,
+            per_page=1,
+        )
+        assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_raw_response_list_insights(self, async_client: AsyncGradient) -> None:
+        response = await async_client.billing.with_raw_response.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        billing = await response.parse()
+        assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_streaming_response_list_insights(self, async_client: AsyncGradient) -> None:
+        async with async_client.billing.with_streaming_response.list_insights(
+            end_date=parse_date("2025-01-31"),
+            account_urn="do:team:12345678-1234-1234-1234-123456789012",
+            start_date=parse_date("2025-01-01"),
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            billing = await response.parse()
+            assert_matches_type(BillingListInsightsResponse, billing, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_path_params_list_insights(self, async_client: AsyncGradient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `account_urn` but received ''"):
+            await async_client.billing.with_raw_response.list_insights(
+                end_date=parse_date("2025-01-31"),
+                account_urn="",
+                start_date=parse_date("2025-01-01"),
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `start_date` but received ''"):
+            await async_client.billing.with_raw_response.list_insights(
+                end_date=parse_date("2025-01-31"),
+                account_urn="do:team:12345678-1234-1234-1234-123456789012",
+                start_date="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `end_date` but received ''"):
+            await async_client.billing.with_raw_response.list_insights(
+                end_date="",
+                account_urn="do:team:12345678-1234-1234-1234-123456789012",
+                start_date=parse_date("2025-01-01"),
+            )
diff --git a/tests/api_resources/test_knowledge_bases.py b/tests/api_resources/test_knowledge_bases.py
index 632951b4..9ce9785d 100644
--- a/tests/api_resources/test_knowledge_bases.py
+++ b/tests/api_resources/test_knowledge_bases.py
@@ -46,6 +46,13 @@ def test_method_create_with_all_params(self, client: Gradient) -> None:
                     },
                     "bucket_name": "example name",
                     "bucket_region": "example string",
+                    "chunking_algorithm": "CHUNKING_ALGORITHM_SECTION_BASED",
+                    "chunking_options": {
+                        "child_chunk_size": 350,
+                        "max_chunk_size": 750,
+                        "parent_chunk_size": 1000,
+                        "semantic_threshold": 0.5,
+                    },
                     "dropbox_data_source": {
                         "folder": "example string",
                         "refresh_token": "example string",
@@ -447,6 +454,13 @@ async def test_method_create_with_all_params(self, async_client: AsyncGradient)
                     },
                     "bucket_name": "example name",
                     "bucket_region": "example string",
+                    "chunking_algorithm": "CHUNKING_ALGORITHM_SECTION_BASED",
+                    "chunking_options": {
+                        "child_chunk_size": 350,
+                        "max_chunk_size": 750,
+                        "parent_chunk_size": 1000,
+                        "semantic_threshold": 0.5,
+                    },
                     "dropbox_data_source": {
                         "folder": "example string",
                         "refresh_token": "example string",
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
new file mode 100644
index 00000000..0d8d7acf
--- /dev/null
+++ b/tests/api_resources/test_responses.py
@@ -0,0 +1,296 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from gradient import Gradient, AsyncGradient
+from tests.utils import assert_matches_type
+from gradient.types.shared import CreateResponseResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestResponses:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_create_overload_1(self, client: Gradient) -> None:
+        response = client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+        )
+        assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: Gradient) -> None:
+        response = client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            instructions="You are a helpful assistant.",
+            max_output_tokens=1024,
+            max_tokens=1024,
+            metadata={"foo": "string"},
+            modalities=["text"],
+            parallel_tool_calls=True,
+            stop="\n",
+            stream=False,
+            stream_options={"include_usage": True},
+            temperature=1,
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "function",
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                }
+            ],
+            top_p=1,
+            user="user-1234",
+        )
+        assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: Gradient) -> None:
+        http_response = client.responses.with_raw_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: Gradient) -> None:
+        with client.responses.with_streaming_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_create_overload_2(self, client: Gradient) -> None:
+        response_stream = client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+        )
+        response_stream.response.close()
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: Gradient) -> None:
+        response_stream = client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+            instructions="You are a helpful assistant.",
+            max_output_tokens=1024,
+            max_tokens=1024,
+            metadata={"foo": "string"},
+            modalities=["text"],
+            parallel_tool_calls=True,
+            stop="\n",
+            stream_options={"include_usage": True},
+            temperature=1,
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "function",
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                }
+            ],
+            top_p=1,
+            user="user-1234",
+        )
+        response_stream.response.close()
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: Gradient) -> None:
+        response = client.responses.with_raw_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: Gradient) -> None:
+        with client.responses.with_streaming_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncResponses:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncGradient) -> None:
+        response = await async_client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+        )
+        assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncGradient) -> None:
+        response = await async_client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            instructions="You are a helpful assistant.",
+            max_output_tokens=1024,
+            max_tokens=1024,
+            metadata={"foo": "string"},
+            modalities=["text"],
+            parallel_tool_calls=True,
+            stop="\n",
+            stream=False,
+            stream_options={"include_usage": True},
+            temperature=1,
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "function",
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                }
+            ],
+            top_p=1,
+            user="user-1234",
+        )
+        assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncGradient) -> None:
+        http_response = await async_client.responses.with_raw_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = await http_response.parse()
+        assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncGradient) -> None:
+        async with async_client.responses.with_streaming_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(CreateResponseResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncGradient) -> None:
+        response_stream = await async_client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+        )
+        await response_stream.response.aclose()
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncGradient) -> None:
+        response_stream = await async_client.responses.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+            instructions="You are a helpful assistant.",
+            max_output_tokens=1024,
+            max_tokens=1024,
+            metadata={"foo": "string"},
+            modalities=["text"],
+            parallel_tool_calls=True,
+            stop="\n",
+            stream_options={"include_usage": True},
+            temperature=1,
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "function",
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                }
+            ],
+            top_p=1,
+            user="user-1234",
+        )
+        await response_stream.response.aclose()
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncGradient) -> None:
+        response = await async_client.responses.with_raw_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = await response.parse()
+        await stream.close()
+
+    @pytest.mark.skip(reason="Prism tests are disabled")
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncGradient) -> None:
+        async with async_client.responses.with_streaming_response.create(
+            input="Tell me a three-sentence bedtime story about a unicorn.",
+            model="llama3-8b-instruct",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/test_client.py b/tests/test_client.py
index 4b645c08..d5f1bbe6 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -8,10 +8,11 @@
 import json
 import asyncio
 import inspect
+import dataclasses
 import tracemalloc
-from typing import Any, Union, cast
+from typing import Any, Union, TypeVar, Callable, Iterable, Iterator, Optional, Coroutine, cast
 from unittest import mock
-from typing_extensions import Literal
+from typing_extensions import Literal, AsyncIterator, override
 
 import httpx
 import pytest
@@ -41,6 +42,7 @@
 
 from .utils import update_env
 
+T = TypeVar("T")
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 access_token = "My Access Token"
 model_access_key = "My Model Access Key"
@@ -57,11 +59,60 @@ def _low_retry_timeout(*_args: Any, **_kwargs: Any) -> float:
     return 0.1
 
 
+def mirror_request_content(request: httpx.Request) -> httpx.Response:
+    return httpx.Response(200, content=request.content)
+
+
+# note: we can't use the httpx.MockTransport class as it consumes the request
+#       body itself, which means we can't test that the body is read lazily
+class MockTransport(httpx.BaseTransport, httpx.AsyncBaseTransport):
+    def __init__(
+        self,
+        handler: Callable[[httpx.Request], httpx.Response]
+        | Callable[[httpx.Request], Coroutine[Any, Any, httpx.Response]],
+    ) -> None:
+        self.handler = handler
+
+    @override
+    def handle_request(
+        self,
+        request: httpx.Request,
+    ) -> httpx.Response:
+        assert not inspect.iscoroutinefunction(self.handler), "handler must not be a coroutine function"
+        assert inspect.isfunction(self.handler), "handler must be a function"
+        return self.handler(request)
+
+    @override
+    async def handle_async_request(
+        self,
+        request: httpx.Request,
+    ) -> httpx.Response:
+        assert inspect.iscoroutinefunction(self.handler), "handler must be a coroutine function"
+        return await self.handler(request)
+
+
+@dataclasses.dataclass
+class Counter:
+    value: int = 0
+
+
+def _make_sync_iterator(iterable: Iterable[T], counter: Optional[Counter] = None) -> Iterator[T]:
+    for item in iterable:
+        if counter:
+            counter.value += 1
+        yield item
+
+
+async def _make_async_iterator(iterable: Iterable[T], counter: Optional[Counter] = None) -> AsyncIterator[T]:
+    for item in iterable:
+        if counter:
+            counter.value += 1
+        yield item
+
+
 def _get_open_connections(client: Gradient | AsyncGradient) -> int:
     transport = client._client._transport
-    assert isinstance(transport, httpx.HTTPTransport) or isinstance(
-        transport, httpx.AsyncHTTPTransport
-    )
+    assert isinstance(transport, httpx.HTTPTransport) or isinstance(transport, httpx.AsyncHTTPTransport)
 
     pool = transport._pool
     return len(pool._requests)
@@ -70,9 +121,7 @@ def _get_open_connections(client: Gradient | AsyncGradient) -> int:
 class TestGradient:
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response(self, respx_mock: MockRouter, client: Gradient) -> None:
-        respx_mock.post("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
@@ -80,9 +129,7 @@ def test_raw_response(self, respx_mock: MockRouter, client: Gradient) -> None:
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response_for_binary(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_raw_response_for_binary(self, respx_mock: MockRouter, client: Gradient) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(
                 200,
@@ -231,9 +278,7 @@ def test_copy_signature(self, client: Gradient) -> None:
                 continue
 
             copy_param = copy_signature.parameters.get(name)
-            assert (
-                copy_param is not None
-            ), f"copy() signature is missing the {name} param"
+            assert copy_param is not None, f"copy() signature is missing the {name} param"
 
     @pytest.mark.skipif(
         sys.version_info >= (3, 10),
@@ -269,9 +314,7 @@ def build_request(options: FinalRequestOptions) -> None:
 
         tracemalloc.stop()
 
-        def add_leak(
-            leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff
-        ) -> None:
+        def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff) -> None:
             if diff.count == 0:
                 # Avoid false positives by considering only leaks (i.e. allocations that persist).
                 return
@@ -314,9 +357,7 @@ def test_request_timeout(self, client: Gradient) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = client._build_request(
-            FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
-        )
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(100.0)
 
@@ -348,9 +389,7 @@ def test_http_client_timeout_option(self) -> None:
                 http_client=http_client,
             )
 
-            request = client._build_request(
-                FinalRequestOptions(method="get", url="/foo")
-            )
+            request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
@@ -367,9 +406,7 @@ def test_http_client_timeout_option(self) -> None:
                 http_client=http_client,
             )
 
-            request = client._build_request(
-                FinalRequestOptions(method="get", url="/foo")
-            )
+            request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
@@ -386,9 +423,7 @@ def test_http_client_timeout_option(self) -> None:
                 http_client=http_client,
             )
 
-            request = client._build_request(
-                FinalRequestOptions(method="get", url="/foo")
-            )
+            request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
@@ -415,9 +450,7 @@ def test_default_headers_option(self) -> None:
             _strict_response_validation=True,
             default_headers={"X-Foo": "bar"},
         )
-        request = test_client._build_request(
-            FinalRequestOptions(method="get", url="/foo")
-        )
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
@@ -432,9 +465,7 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = test_client2._build_request(
-            FinalRequestOptions(method="get", url="/foo")
-        )
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
@@ -474,9 +505,7 @@ def test_validate_headers(self) -> None:
             client2._build_request(FinalRequestOptions(method="get", url="/foo"))
 
         request2 = client2._build_request(
-            FinalRequestOptions(
-                method="get", url="/foo", headers={"Authorization": Omit()}
-            )
+            FinalRequestOptions(method="get", url="/foo", headers={"Authorization": Omit()})
         )
         assert request2.headers.get("Authorization") is None
 
@@ -607,9 +636,7 @@ def test_multipart_repeating_array(self, client: Gradient) -> None:
             FinalRequestOptions.construct(
                 method="post",
                 url="/foo",
-                headers={
-                    "Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"
-                },
+                headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
                 json_data={"array": ["foo", "bar"]},
                 files=[("foo.txt", b"hello world")],
             )
@@ -634,27 +661,85 @@ def test_multipart_repeating_array(self, client: Gradient) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    def test_basic_union_response(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_binary_content_upload(self, respx_mock: MockRouter, client: Gradient) -> None:
+        respx_mock.post("/upload").mock(side_effect=mirror_request_content)
+
+        file_content = b"Hello, this is a test file."
+
+        response = client.post(
+            "/upload",
+            content=file_content,
+            cast_to=httpx.Response,
+            options={"headers": {"Content-Type": "application/octet-stream"}},
+        )
+
+        assert response.status_code == 200
+        assert response.request.headers["Content-Type"] == "application/octet-stream"
+        assert response.content == file_content
+
+    def test_binary_content_upload_with_iterator(self) -> None:
+        file_content = b"Hello, this is a test file."
+        counter = Counter()
+        iterator = _make_sync_iterator([file_content], counter=counter)
+
+        def mock_handler(request: httpx.Request) -> httpx.Response:
+            assert counter.value == 0, "the request body should not have been read"
+            return httpx.Response(200, content=request.read())
+
+        with Gradient(
+            base_url=base_url,
+            access_token=access_token,
+            _strict_response_validation=True,
+            http_client=httpx.Client(transport=MockTransport(handler=mock_handler)),
+        ) as client:
+            response = client.post(
+                "/upload",
+                content=iterator,
+                cast_to=httpx.Response,
+                options={"headers": {"Content-Type": "application/octet-stream"}},
+            )
+
+            assert response.status_code == 200
+            assert response.request.headers["Content-Type"] == "application/octet-stream"
+            assert response.content == file_content
+            assert counter.value == 1
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_binary_content_upload_with_body_is_deprecated(self, respx_mock: MockRouter, client: Gradient) -> None:
+        respx_mock.post("/upload").mock(side_effect=mirror_request_content)
+
+        file_content = b"Hello, this is a test file."
+
+        with pytest.deprecated_call(
+            match="Passing raw bytes as `body` is deprecated and will be removed in a future version. Please pass raw bytes via the `content` parameter instead."
+        ):
+            response = client.post(
+                "/upload",
+                body=file_content,
+                cast_to=httpx.Response,
+                options={"headers": {"Content-Type": "application/octet-stream"}},
+            )
+
+        assert response.status_code == 200
+        assert response.request.headers["Content-Type"] == "application/octet-stream"
+        assert response.content == file_content
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_basic_union_response(self, respx_mock: MockRouter, client: Gradient) -> None:
         class Model1(BaseModel):
             name: str
 
         class Model2(BaseModel):
             foo: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    def test_union_response_different_types(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_union_response_different_types(self, respx_mock: MockRouter, client: Gradient) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -663,9 +748,7 @@ class Model1(BaseModel):
         class Model2(BaseModel):
             foo: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
@@ -678,9 +761,7 @@ class Model2(BaseModel):
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    def test_non_application_json_content_type_for_json_data(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter, client: Gradient) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -850,15 +931,11 @@ def test_client_context_manager(self) -> None:
         assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    def test_client_response_validation_error(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_client_response_validation_error(self, respx_mock: MockRouter, client: Gradient) -> None:
         class Model(BaseModel):
             foo: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": {"invalid": True}})
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
             client.get("/foo", cast_to=Model)
@@ -881,13 +958,9 @@ def test_default_stream_cls(self, respx_mock: MockRouter, client: Gradient) -> N
         class Model(BaseModel):
             name: str
 
-        respx_mock.post("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        stream = client.post(
-            "/foo", cast_to=Model, stream=True, stream_cls=Stream[Model]
-        )
+        stream = client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
         assert isinstance(stream, Stream)
         stream.response.close()
 
@@ -896,9 +969,7 @@ def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
             name: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, text="my-custom-format")
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, text="my-custom-format"))
 
         strict_client = Gradient(
             base_url=base_url,
@@ -959,16 +1030,10 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, rel=0.5 * 0.875)  # type: ignore[misc]
 
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retrying_timeout_errors_doesnt_leak(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
-        respx_mock.post("/chat/completions").mock(
-            side_effect=httpx.TimeoutException("Test timeout error")
-        )
+    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: Gradient) -> None:
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
             client.chat.completions.with_streaming_response.create(
@@ -983,13 +1048,9 @@ def test_retrying_timeout_errors_doesnt_leak(
 
         assert _get_open_connections(client) == 0
 
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retrying_status_errors_doesnt_leak(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: Gradient) -> None:
         respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
@@ -1005,9 +1066,7 @@ def test_retrying_status_errors_doesnt_leak(
         assert _get_open_connections(client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     def test_retries_taken(
@@ -1043,15 +1102,10 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
         )
 
         assert response.retries_taken == failures_before_success
-        assert (
-            int(response.http_request.headers.get("x-stainless-retry-count"))
-            == failures_before_success
-        )
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_omit_retry_count_header(
         self, client: Gradient, failures_before_success: int, respx_mock: MockRouter
@@ -1080,14 +1134,10 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             extra_headers={"x-stainless-retry-count": Omit()},
         )
 
-        assert (
-            len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
-        )
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_overwrite_retry_count_header(
         self, client: Gradient, failures_before_success: int, respx_mock: MockRouter
@@ -1144,29 +1194,19 @@ def test_default_client_creation(self) -> None:
     def test_follow_redirects(self, respx_mock: MockRouter, client: Gradient) -> None:
         # Test that the default follow_redirects=True allows following redirects
         respx_mock.post("/redirect").mock(
-            return_value=httpx.Response(
-                302, headers={"Location": f"{base_url}/redirected"}
-            )
-        )
-        respx_mock.get("/redirected").mock(
-            return_value=httpx.Response(200, json={"status": "ok"})
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
+        respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
 
-        response = client.post(
-            "/redirect", body={"key": "value"}, cast_to=httpx.Response
-        )
+        response = client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
         assert response.status_code == 200
         assert response.json() == {"status": "ok"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_follow_redirects_disabled(
-        self, respx_mock: MockRouter, client: Gradient
-    ) -> None:
+    def test_follow_redirects_disabled(self, respx_mock: MockRouter, client: Gradient) -> None:
         # Test that follow_redirects=False prevents following redirects
         respx_mock.post("/redirect").mock(
-            return_value=httpx.Response(
-                302, headers={"Location": f"{base_url}/redirected"}
-            )
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
 
         with pytest.raises(APIStatusError) as exc_info:
@@ -1183,12 +1223,8 @@ def test_follow_redirects_disabled(
 
 class TestAsyncGradient:
     @pytest.mark.respx(base_url=base_url)
-    async def test_raw_response(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
-        respx_mock.post("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+    async def test_raw_response(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
+        respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
@@ -1196,9 +1232,7 @@ async def test_raw_response(
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_raw_response_for_binary(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
+    async def test_raw_response_for_binary(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(
                 200,
@@ -1347,9 +1381,7 @@ def test_copy_signature(self, async_client: AsyncGradient) -> None:
                 continue
 
             copy_param = copy_signature.parameters.get(name)
-            assert (
-                copy_param is not None
-            ), f"copy() signature is missing the {name} param"
+            assert copy_param is not None, f"copy() signature is missing the {name} param"
 
     @pytest.mark.skipif(
         sys.version_info >= (3, 10),
@@ -1385,9 +1417,7 @@ def build_request(options: FinalRequestOptions) -> None:
 
         tracemalloc.stop()
 
-        def add_leak(
-            leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff
-        ) -> None:
+        def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff) -> None:
             if diff.count == 0:
                 # Avoid false positives by considering only leaks (i.e. allocations that persist).
                 return
@@ -1426,9 +1456,7 @@ def add_leak(
             raise AssertionError()
 
     async def test_request_timeout(self, async_client: AsyncGradient) -> None:
-        request = async_client._build_request(
-            FinalRequestOptions(method="get", url="/foo")
-        )
+        request = async_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
@@ -1466,9 +1494,7 @@ async def test_http_client_timeout_option(self) -> None:
                 http_client=http_client,
             )
 
-            request = client._build_request(
-                FinalRequestOptions(method="get", url="/foo")
-            )
+            request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
@@ -1485,9 +1511,7 @@ async def test_http_client_timeout_option(self) -> None:
                 http_client=http_client,
             )
 
-            request = client._build_request(
-                FinalRequestOptions(method="get", url="/foo")
-            )
+            request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
@@ -1504,9 +1528,7 @@ async def test_http_client_timeout_option(self) -> None:
                 http_client=http_client,
             )
 
-            request = client._build_request(
-                FinalRequestOptions(method="get", url="/foo")
-            )
+            request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
@@ -1533,9 +1555,7 @@ async def test_default_headers_option(self) -> None:
             _strict_response_validation=True,
             default_headers={"X-Foo": "bar"},
         )
-        request = test_client._build_request(
-            FinalRequestOptions(method="get", url="/foo")
-        )
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
@@ -1550,9 +1570,7 @@ async def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = test_client2._build_request(
-            FinalRequestOptions(method="get", url="/foo")
-        )
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
@@ -1592,9 +1610,7 @@ def test_validate_headers(self) -> None:
             client2._build_request(FinalRequestOptions(method="get", url="/foo"))
 
         request2 = client2._build_request(
-            FinalRequestOptions(
-                method="get", url="/foo", headers={"Authorization": Omit()}
-            )
+            FinalRequestOptions(method="get", url="/foo", headers={"Authorization": Omit()})
         )
         assert request2.headers.get("Authorization") is None
 
@@ -1725,9 +1741,7 @@ def test_multipart_repeating_array(self, async_client: AsyncGradient) -> None:
             FinalRequestOptions.construct(
                 method="post",
                 url="/foo",
-                headers={
-                    "Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"
-                },
+                headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
                 json_data={"array": ["foo", "bar"]},
                 files=[("foo.txt", b"hello world")],
             )
@@ -1752,29 +1766,87 @@ def test_multipart_repeating_array(self, async_client: AsyncGradient) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_basic_union_response(
+    async def test_binary_content_upload(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
+        respx_mock.post("/upload").mock(side_effect=mirror_request_content)
+
+        file_content = b"Hello, this is a test file."
+
+        response = await async_client.post(
+            "/upload",
+            content=file_content,
+            cast_to=httpx.Response,
+            options={"headers": {"Content-Type": "application/octet-stream"}},
+        )
+
+        assert response.status_code == 200
+        assert response.request.headers["Content-Type"] == "application/octet-stream"
+        assert response.content == file_content
+
+    async def test_binary_content_upload_with_asynciterator(self) -> None:
+        file_content = b"Hello, this is a test file."
+        counter = Counter()
+        iterator = _make_async_iterator([file_content], counter=counter)
+
+        async def mock_handler(request: httpx.Request) -> httpx.Response:
+            assert counter.value == 0, "the request body should not have been read"
+            return httpx.Response(200, content=await request.aread())
+
+        async with AsyncGradient(
+            base_url=base_url,
+            access_token=access_token,
+            _strict_response_validation=True,
+            http_client=httpx.AsyncClient(transport=MockTransport(handler=mock_handler)),
+        ) as client:
+            response = await client.post(
+                "/upload",
+                content=iterator,
+                cast_to=httpx.Response,
+                options={"headers": {"Content-Type": "application/octet-stream"}},
+            )
+
+            assert response.status_code == 200
+            assert response.request.headers["Content-Type"] == "application/octet-stream"
+            assert response.content == file_content
+            assert counter.value == 1
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_binary_content_upload_with_body_is_deprecated(
         self, respx_mock: MockRouter, async_client: AsyncGradient
     ) -> None:
+        respx_mock.post("/upload").mock(side_effect=mirror_request_content)
+
+        file_content = b"Hello, this is a test file."
+
+        with pytest.deprecated_call(
+            match="Passing raw bytes as `body` is deprecated and will be removed in a future version. Please pass raw bytes via the `content` parameter instead."
+        ):
+            response = await async_client.post(
+                "/upload",
+                body=file_content,
+                cast_to=httpx.Response,
+                options={"headers": {"Content-Type": "application/octet-stream"}},
+            )
+
+        assert response.status_code == 200
+        assert response.request.headers["Content-Type"] == "application/octet-stream"
+        assert response.content == file_content
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_basic_union_response(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         class Model1(BaseModel):
             name: str
 
         class Model2(BaseModel):
             foo: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await async_client.get(
-            "/foo", cast_to=cast(Any, Union[Model1, Model2])
-        )
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_union_response_different_types(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
+    async def test_union_response_different_types(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -1783,21 +1855,15 @@ class Model1(BaseModel):
         class Model2(BaseModel):
             foo: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await async_client.get(
-            "/foo", cast_to=cast(Any, Union[Model1, Model2])
-        )
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = await async_client.get(
-            "/foo", cast_to=cast(Any, Union[Model1, Model2])
-        )
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
@@ -1979,15 +2045,11 @@ async def test_client_context_manager(self) -> None:
         assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_client_response_validation_error(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
+    async def test_client_response_validation_error(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         class Model(BaseModel):
             foo: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": {"invalid": True}})
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
             await async_client.get("/foo", cast_to=Model)
@@ -2006,32 +2068,22 @@ async def test_client_max_retries_validation(self) -> None:
             )
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_default_stream_cls(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
+    async def test_default_stream_cls(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         class Model(BaseModel):
             name: str
 
-        respx_mock.post("/foo").mock(
-            return_value=httpx.Response(200, json={"foo": "bar"})
-        )
+        respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        stream = await async_client.post(
-            "/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model]
-        )
+        stream = await async_client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
         assert isinstance(stream, AsyncStream)
         await stream.response.aclose()
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_received_text_for_expected_json(
-        self, respx_mock: MockRouter
-    ) -> None:
+    async def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
             name: str
 
-        respx_mock.get("/foo").mock(
-            return_value=httpx.Response(200, text="my-custom-format")
-        )
+        respx_mock.get("/foo").mock(return_value=httpx.Response(200, text="my-custom-format"))
 
         strict_client = AsyncGradient(
             base_url=base_url,
@@ -2095,16 +2147,12 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
         calculated = async_client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, rel=0.5 * 0.875)  # type: ignore[misc]
 
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_timeout_errors_doesnt_leak(
         self, respx_mock: MockRouter, async_client: AsyncGradient
     ) -> None:
-        respx_mock.post("/chat/completions").mock(
-            side_effect=httpx.TimeoutException("Test timeout error")
-        )
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
             await async_client.chat.completions.with_streaming_response.create(
@@ -2119,9 +2167,7 @@ async def test_retrying_timeout_errors_doesnt_leak(
 
         assert _get_open_connections(async_client) == 0
 
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_status_errors_doesnt_leak(
         self, respx_mock: MockRouter, async_client: AsyncGradient
@@ -2141,9 +2187,7 @@ async def test_retrying_status_errors_doesnt_leak(
         assert _get_open_connections(async_client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     async def test_retries_taken(
@@ -2179,15 +2223,10 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
         )
 
         assert response.retries_taken == failures_before_success
-        assert (
-            int(response.http_request.headers.get("x-stainless-retry-count"))
-            == failures_before_success
-        )
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_omit_retry_count_header(
         self,
@@ -2219,14 +2258,10 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             extra_headers={"x-stainless-retry-count": Omit()},
         )
 
-        assert (
-            len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
-        )
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch(
-        "gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout
-    )
+    @mock.patch("gradient._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_overwrite_retry_count_header(
         self,
@@ -2264,9 +2299,7 @@ async def test_get_platform(self) -> None:
         platform = await asyncify(get_platform)()
         assert isinstance(platform, (str, OtherPlatform))
 
-    async def test_proxy_environment_variables(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
+    async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
         # Test that the proxy environment variables are set correctly
         monkeypatch.setenv("HTTPS_PROXY", "https://example.org")
 
@@ -2289,34 +2322,22 @@ async def test_default_client_creation(self) -> None:
         )
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_follow_redirects(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
+    async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         # Test that the default follow_redirects=True allows following redirects
         respx_mock.post("/redirect").mock(
-            return_value=httpx.Response(
-                302, headers={"Location": f"{base_url}/redirected"}
-            )
-        )
-        respx_mock.get("/redirected").mock(
-            return_value=httpx.Response(200, json={"status": "ok"})
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
+        respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
 
-        response = await async_client.post(
-            "/redirect", body={"key": "value"}, cast_to=httpx.Response
-        )
+        response = await async_client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
         assert response.status_code == 200
         assert response.json() == {"status": "ok"}
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_follow_redirects_disabled(
-        self, respx_mock: MockRouter, async_client: AsyncGradient
-    ) -> None:
+    async def test_follow_redirects_disabled(self, respx_mock: MockRouter, async_client: AsyncGradient) -> None:
         # Test that follow_redirects=False prevents following redirects
         respx_mock.post("/redirect").mock(
-            return_value=httpx.Response(
-                302, headers={"Location": f"{base_url}/redirected"}
-            )
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
 
         with pytest.raises(APIStatusError) as exc_info:
diff --git a/tests/test_utils/test_json.py b/tests/test_utils/test_json.py
new file mode 100644
index 00000000..4ba6d83c
--- /dev/null
+++ b/tests/test_utils/test_json.py
@@ -0,0 +1,126 @@
+from __future__ import annotations
+
+import datetime
+from typing import Union
+
+import pydantic
+
+from gradient import _compat
+from gradient._utils._json import openapi_dumps
+
+
+class TestOpenapiDumps:
+    def test_basic(self) -> None:
+        data = {"key": "value", "number": 42}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"key":"value","number":42}'
+
+    def test_datetime_serialization(self) -> None:
+        dt = datetime.datetime(2023, 1, 1, 12, 0, 0)
+        data = {"datetime": dt}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"datetime":"2023-01-01T12:00:00"}'
+
+    def test_pydantic_model_serialization(self) -> None:
+        class User(pydantic.BaseModel):
+            first_name: str
+            last_name: str
+            age: int
+
+        model_instance = User(first_name="John", last_name="Kramer", age=83)
+        data = {"model": model_instance}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"first_name":"John","last_name":"Kramer","age":83}}'
+
+    def test_pydantic_model_with_default_values(self) -> None:
+        class User(pydantic.BaseModel):
+            name: str
+            role: str = "user"
+            active: bool = True
+            score: int = 0
+
+        model_instance = User(name="Alice")
+        data = {"model": model_instance}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"name":"Alice"}}'
+
+    def test_pydantic_model_with_default_values_overridden(self) -> None:
+        class User(pydantic.BaseModel):
+            name: str
+            role: str = "user"
+            active: bool = True
+
+        model_instance = User(name="Bob", role="admin", active=False)
+        data = {"model": model_instance}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"name":"Bob","role":"admin","active":false}}'
+
+    def test_pydantic_model_with_alias(self) -> None:
+        class User(pydantic.BaseModel):
+            first_name: str = pydantic.Field(alias="firstName")
+            last_name: str = pydantic.Field(alias="lastName")
+
+        model_instance = User(firstName="John", lastName="Doe")
+        data = {"model": model_instance}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"firstName":"John","lastName":"Doe"}}'
+
+    def test_pydantic_model_with_alias_and_default(self) -> None:
+        class User(pydantic.BaseModel):
+            user_name: str = pydantic.Field(alias="userName")
+            user_role: str = pydantic.Field(default="member", alias="userRole")
+            is_active: bool = pydantic.Field(default=True, alias="isActive")
+
+        model_instance = User(userName="charlie")
+        data = {"model": model_instance}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"userName":"charlie"}}'
+
+        model_with_overrides = User(userName="diana", userRole="admin", isActive=False)
+        data = {"model": model_with_overrides}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"userName":"diana","userRole":"admin","isActive":false}}'
+
+    def test_pydantic_model_with_nested_models_and_defaults(self) -> None:
+        class Address(pydantic.BaseModel):
+            street: str
+            city: str = "Unknown"
+
+        class User(pydantic.BaseModel):
+            name: str
+            address: Address
+            verified: bool = False
+
+        if _compat.PYDANTIC_V1:
+            # to handle forward references in Pydantic v1
+            User.update_forward_refs(**locals())  # type: ignore[reportDeprecated]
+
+        address = Address(street="123 Main St")
+        user = User(name="Diana", address=address)
+        data = {"user": user}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"user":{"name":"Diana","address":{"street":"123 Main St"}}}'
+
+        address_with_city = Address(street="456 Oak Ave", city="Boston")
+        user_verified = User(name="Eve", address=address_with_city, verified=True)
+        data = {"user": user_verified}
+        json_bytes = openapi_dumps(data)
+        assert (
+            json_bytes == b'{"user":{"name":"Eve","address":{"street":"456 Oak Ave","city":"Boston"},"verified":true}}'
+        )
+
+    def test_pydantic_model_with_optional_fields(self) -> None:
+        class User(pydantic.BaseModel):
+            name: str
+            email: Union[str, None]
+            phone: Union[str, None]
+
+        model_with_none = User(name="Eve", email=None, phone=None)
+        data = {"model": model_with_none}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"name":"Eve","email":null,"phone":null}}'
+
+        model_with_values = User(name="Frank", email="frank@example.com", phone=None)
+        data = {"model": model_with_values}
+        json_bytes = openapi_dumps(data)
+        assert json_bytes == b'{"model":{"name":"Frank","email":"frank@example.com","phone":null}}'