Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,61 @@ def from_common(
)


class NDVideoTextAnswer(BaseModel):
value: str
frames: List[Dict[str, int]]

model_config = ConfigDict(populate_by_name=True)


class NDVideoText(BaseModel):
"""Video text classification with per-segment text values and frame ranges.

Produces NDJSON like:
{"name": "...", "answer": [{"value": "text", "frames": [{"start": 1, "end": 5}]}], ...}
"""

name: Optional[str] = None
schema_id: Optional[str] = Field(default=None, alias="schemaId")
answer: List[NDVideoTextAnswer]
data_row: DataRow = Field(alias="dataRow")

model_config = ConfigDict(populate_by_name=True)

@model_validator(mode="after")
def must_set_one(self):
if not self.name and not self.schema_id:
raise ValueError("Schema id or name are not set. Set either one.")
return self

@model_serializer(mode="wrap")
def serialize_model(self, handler):
res = handler(self)
if "name" in res and res["name"] is None:
res.pop("name")
if "schemaId" in res and res["schemaId"] is None:
res.pop("schemaId")
return res

@classmethod
def from_video_text_group(
cls,
annotation_group: List["VideoClassificationAnnotation"],
frame_ranges_by_text: Dict[str, List[Dict[str, int]]],
data: "GenericDataRowData",
) -> "NDVideoText":
first = annotation_group[0]
return cls(
name=first.name,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NDVideoText drops feature_schema_id and requires name

Medium Severity

NDVideoText declares name: str (required, non-optional) and has no schema_id/feature_schema_id field. FeatureSchema allows name=None when feature_schema_id is set, and all other annotation types go through NDAnnotation which accepts either. With this change, a VideoClassificationAnnotation using only feature_schema_id as its identifier will crash at NDVideoText(name=None, …) with a Pydantic validation error. Even when both are provided, feature_schema_id is silently dropped from the serialized output, unlike the NDText/NDRadio/NDChecklist paths that preserve it as schemaId.

Additional Locations (1)

Fix in Cursor Fix in Web

schema_id=first.feature_schema_id,
data_row=DataRow(id=data.uid, global_key=data.global_key),
answer=[
NDVideoTextAnswer(value=text_val, frames=ranges)
for text_val, ranges in frame_ranges_by_text.items()
],
)


class NDPromptTextSubclass(NDAnswer):
answer: str

Expand Down Expand Up @@ -517,6 +572,7 @@ def from_common(
NDRadioSubclass.model_rebuild()
NDRadio.model_rebuild()
NDText.model_rebuild()
NDVideoText.model_rebuild()
NDPromptText.model_rebuild()
NDTextSubclass.model_rebuild()

Expand Down
34 changes: 31 additions & 3 deletions libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
)
from .temporal import create_temporal_ndjson_classifications
from labelbox.types import DocumentRectangle, DocumentEntity
from ...annotation_types.classification.classification import Text
from .classification import (
NDChecklistSubclass,
NDClassification,
Expand All @@ -39,6 +40,7 @@
NDPromptClassificationType,
NDPromptText,
NDRadioSubclass,
NDVideoText,
)
from .metric import NDConfusionMatrixMetric, NDMetricAnnotation, NDScalarMetric
from .mmc import NDMessageTask
Expand All @@ -61,6 +63,7 @@
NDRelationship,
NDPromptText,
NDMessageTask,
NDVideoText,
]


Expand Down Expand Up @@ -142,18 +145,43 @@ def _create_video_annotations(
yield NDObject.from_common(annotation=annot, data=label.data)

for annotation_group in video_annotations.values():
segment_frame_ranges = cls._get_segment_frame_ranges(
annotation_group
)
if isinstance(annotation_group[0], VideoClassificationAnnotation):
annotation = annotation_group[0]

if isinstance(annotation.value, Text):
by_text = defaultdict(list)
for ann in annotation_group:
by_text[ann.value.answer].append(ann)

frame_ranges_by_text = {}
for text_val, anns in sorted(
by_text.items(),
key=lambda x: min(a.frame for a in x[1]),
):
ranges = [
{"start": s, "end": e}
for s, e in cls._get_segment_frame_ranges(anns)
]
frame_ranges_by_text[text_val] = ranges

yield NDVideoText.from_video_text_group(
annotation_group, frame_ranges_by_text, label.data
)
continue

segment_frame_ranges = cls._get_segment_frame_ranges(
annotation_group
)
frames_data = []
for frames in segment_frame_ranges:
frames_data.append({"start": frames[0], "end": frames[-1]})
annotation.extra.update({"frames": frames_data})
yield NDClassification.from_common(annotation, label.data)

elif isinstance(annotation_group[0], VideoObjectAnnotation):
segment_frame_ranges = cls._get_segment_frame_ranges(
annotation_group
)
segments = []
for start_frame, end_frame in segment_frame_ranges:
segment = []
Expand Down
87 changes: 87 additions & 0 deletions libs/labelbox/tests/data/serialization/ndjson/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,93 @@ def test_video_classification_global_subclassifications():
assert res == [expected_first_annotation, expected_second_annotation]


def test_video_classification_text_produces_ndjson_with_frames():
"""VideoClassificationAnnotation + Text serializes with answer as a list of {value, frames}."""
label = Label(
data=GenericDataRowData(global_key="sample-video-text"),
annotations=[
VideoClassificationAnnotation(
name="free_text",
frame=9,
segment_index=0,
value=Text(answer="Looks like a hungry big cat"),
),
VideoClassificationAnnotation(
name="free_text",
frame=15,
segment_index=0,
value=Text(answer="Looks like a hungry big cat"),
),
VideoClassificationAnnotation(
name="free_text",
frame=40,
segment_index=1,
value=Text(answer="It's getting closer!"),
),
VideoClassificationAnnotation(
name="free_text",
frame=50,
segment_index=1,
value=Text(answer="It's getting closer!"),
),
],
)
serialized = list(NDJsonConverter.serialize([label]))
free_text_rows = [r for r in serialized if r.get("name") == "free_text"]
assert len(free_text_rows) == 1

row = free_text_rows[0]
assert row["dataRow"] == {"globalKey": "sample-video-text"}
assert "answer" in row
answer = row["answer"]
assert isinstance(answer, list)
assert len(answer) == 2

by_value = {a["value"]: a for a in answer}
assert "Looks like a hungry big cat" in by_value
assert "It's getting closer!" in by_value
assert by_value["Looks like a hungry big cat"]["frames"] == [
{"start": 9, "end": 15}
]
assert by_value["It's getting closer!"]["frames"] == [
{"start": 40, "end": 50}
]


def test_video_classification_text_single_text_across_frames():
"""VideoClassificationAnnotation + Text with same text across all frames."""
label = Label(
data=GenericDataRowData(global_key="sample-video-single-text"),
annotations=[
VideoClassificationAnnotation(
name="free_text_per_frame",
frame=9,
segment_index=0,
value=Text(answer="sample text"),
),
VideoClassificationAnnotation(
name="free_text_per_frame",
frame=15,
segment_index=0,
value=Text(answer="sample text"),
),
],
)
serialized = list(NDJsonConverter.serialize([label]))
free_text_rows = [
r for r in serialized if r.get("name") == "free_text_per_frame"
]
assert len(free_text_rows) == 1

row = free_text_rows[0]
assert row["dataRow"] == {"globalKey": "sample-video-single-text"}
answer = row["answer"]
assert isinstance(answer, list)
assert len(answer) == 1
assert answer[0]["value"] == "sample text"
assert answer[0]["frames"] == [{"start": 9, "end": 15}]


def test_video_classification_nesting_bbox():
bbox_annotation = [
VideoObjectAnnotation(
Expand Down
Loading