Skip to content

Commit 22a7ac9

Browse files
authored
STT: list and get files (#655)
1 parent c944330 commit 22a7ac9

7 files changed

Lines changed: 567 additions & 3 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Retrieve metadata for a specific audio file by its ID.
2+
3+
Returns detailed information about an uploaded audio file, including its S3 location, size, content type, and timestamps. You get a pre-signed S3 URL for direct file access as well if you have set ``include_signed_url=True``.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
List all audio files for the current project.
2+
3+
Returns metadata for all uploaded audio files, including file IDs, filenames, sizes, and content types. You get a pre-signed S3 URL with all the files in the list for direct file access as well if you have set ``include_signed_url=True``.

backend/app/api/routes/stt_evaluations/files.py

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22

33
import logging
44

5-
from fastapi import APIRouter, Depends, File, UploadFile
5+
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, Query
66

77
from app.api.deps import AuthContextDep, SessionDep
8+
from app.core.cloud import get_cloud_storage
89
from app.api.permissions import Permission, require_permission
9-
from app.models.file import AudioUploadResponse
10+
from app.models.file import AudioUploadResponse, FilePublic
11+
from app.crud.file import get_file_by_id, list_files
1012
from app.services.stt_evaluations.audio import upload_audio_file
13+
from app.services.stt_evaluations.helpers import build_file_schema, build_file_schemas
1114
from app.utils import APIResponse, load_description
1215

1316
logger = logging.getLogger(__name__)
@@ -41,3 +44,89 @@ def upload_audio(
4144
)
4245

4346
return APIResponse.success_response(data=result)
47+
48+
49+
@router.get(
50+
"/files",
51+
response_model=APIResponse[list[FilePublic]],
52+
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
53+
summary="List audio files",
54+
description=load_description("stt_evaluation/list_audios.md"),
55+
)
56+
def list_audio(
57+
session: SessionDep,
58+
auth_context: AuthContextDep,
59+
include_signed_url: bool = Query(
60+
False, description="Include a signed URL to access the audio file"
61+
),
62+
) -> APIResponse[list[FilePublic]]:
63+
"""Get audio files per project if provided"""
64+
65+
logger.info(
66+
f"[list_audio] Listing audio files | "
67+
f"project_id: {auth_context.project_.id}, "
68+
f"include_signed_url: {include_signed_url}"
69+
)
70+
71+
storage = None
72+
if include_signed_url:
73+
storage = get_cloud_storage(
74+
session=session, project_id=auth_context.project_.id
75+
)
76+
77+
files = list_files(
78+
session=session,
79+
organization_id=auth_context.organization_.id,
80+
project_id=auth_context.project_.id,
81+
)
82+
83+
result = build_file_schemas(
84+
files=files, include_signed_url=include_signed_url, storage=storage
85+
)
86+
87+
return APIResponse.success_response(data=result)
88+
89+
90+
@router.get(
91+
"/files/{file_id}",
92+
response_model=APIResponse[FilePublic],
93+
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
94+
summary="Get audio file by ID",
95+
description=load_description("stt_evaluation/get_audio.md"),
96+
)
97+
def get_audio(
98+
session: SessionDep,
99+
auth_context: AuthContextDep,
100+
file_id: int,
101+
include_signed_url: bool = Query(
102+
False, description="Include a signed URL to access the audio file"
103+
),
104+
) -> APIResponse[FilePublic]:
105+
"""Get a single audio file by ID with optional signed URL."""
106+
logger.info(
107+
f"[get_audio] Getting audio file | "
108+
f"project_id: {auth_context.project_.id}, file_id: {file_id}, "
109+
f"include_signed_url: {include_signed_url}"
110+
)
111+
112+
file = get_file_by_id(
113+
session=session,
114+
file_id=file_id,
115+
organization_id=auth_context.organization_.id,
116+
project_id=auth_context.project_.id,
117+
)
118+
119+
if not file:
120+
raise HTTPException(status_code=404, detail=f"File with ID {file_id} not found")
121+
122+
storage = None
123+
if include_signed_url:
124+
storage = get_cloud_storage(
125+
session=session, project_id=auth_context.project_.id
126+
)
127+
128+
result = build_file_schema(
129+
file=file, include_signed_url=include_signed_url, storage=storage
130+
)
131+
132+
return APIResponse.success_response(data=result)

backend/app/crud/file.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from app.core.util import now
88
from app.models.file import File, FileType
99

10+
1011
logger = logging.getLogger(__name__)
1112

1213

@@ -122,3 +123,28 @@ def get_files_by_ids(
122123
)
123124

124125
return list(session.exec(statement).all())
126+
127+
128+
def list_files(
129+
*,
130+
session: Session,
131+
organization_id: int,
132+
project_id: int,
133+
) -> list[File]:
134+
"""Get all file records for an organization and project.
135+
136+
Args:
137+
session: Database session
138+
organization_id: Organization ID
139+
project_id: Project ID
140+
141+
Returns:
142+
list[File]: List of all file records found
143+
"""
144+
statement = select(File).where(
145+
File.organization_id == organization_id,
146+
File.project_id == project_id,
147+
File.file_type == FileType.AUDIO.value,
148+
)
149+
150+
return list(session.exec(statement).all())

backend/app/models/file.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class FilePublic(SQLModel):
9797

9898
id: int
9999
object_store_url: str
100+
signed_url: str | None = None
100101
filename: str
101102
size_bytes: int
102103
content_type: str
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""Helper functions for building audio file response schemas with signed URLs."""
2+
3+
import logging
4+
from typing import Iterable
5+
6+
from app.models.file import File, FilePublic
7+
8+
logger = logging.getLogger(__name__)
9+
10+
11+
def build_file_schema(
12+
*,
13+
file: File,
14+
include_signed_url: bool,
15+
storage: object | None,
16+
) -> FilePublic:
17+
"""Build a single file schema, optionally attaching a signed URL.
18+
19+
Args:
20+
file: The File database model instance
21+
include_url: Whether to generate and include a signed URL
22+
storage: Cloud storage instance for generating signed URLs
23+
24+
Returns:
25+
FilePublic schema with optional signed_url
26+
"""
27+
schema = FilePublic.model_validate(file, from_attributes=True)
28+
if include_signed_url and storage:
29+
try:
30+
schema.signed_url = storage.get_signed_url(file.object_store_url)
31+
except Exception as e:
32+
logger.warning(
33+
f"[build_file_schema] Failed to generate signed URL for file {file.id}"
34+
)
35+
schema.signed_url = None
36+
return schema
37+
38+
39+
def build_file_schemas(
40+
*,
41+
files: Iterable[File],
42+
include_signed_url: bool,
43+
storage: object | None,
44+
) -> list[FilePublic]:
45+
"""Build multiple file schemas efficiently, optionally attaching signed URLs.
46+
47+
Args:
48+
files: Iterable of File database model instances
49+
include_url: Whether to generate and include signed URLs
50+
storage: Cloud storage instance for generating signed URLs
51+
52+
Returns:
53+
List of FilePublic schemas with optional signed_url
54+
"""
55+
out: list[FilePublic] = []
56+
for file in files:
57+
schema = FilePublic.model_validate(file, from_attributes=True)
58+
if include_signed_url and storage:
59+
try:
60+
schema.signed_url = storage.get_signed_url(file.object_store_url)
61+
except Exception as e:
62+
logger.warning(
63+
f"[build_file_schemas] Failed to generate signed URL for file {file.id}"
64+
)
65+
schema.signed_url = None
66+
out.append(schema)
67+
return out

0 commit comments

Comments
 (0)