Skip to content

Commit 978e389

Browse files
authored
Merge pull request #367 from clamsproject/366-pydantic-describe
workflow descriptors are now pydantic-based
2 parents a6db461 + 9ee0bd5 commit 978e389

8 files changed

Lines changed: 765 additions & 425 deletions

File tree

build-tools/requirements.docs.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
sphinx>=7.0,<8.0
1+
sphinx
22
furo
33
m2r2
4+
autodoc-pydantic

documentation/conf.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
'undoc-members': True,
3434
'show-inheritance': True,
3535
}
36+
autodoc_member_order = 'bysource'
3637

3738

3839
# -- Project information -----------------------------------------------------
@@ -55,8 +56,16 @@
5556
'sphinx.ext.autodoc',
5657
'sphinx.ext.linkcode',
5758
'm2r2',
59+
'sphinxcontrib.autodoc_pydantic',
5860
]
5961

62+
autodoc_pydantic_model_show_json = True
63+
autodoc_pydantic_model_show_field_summary = True
64+
autodoc_pydantic_model_show_config_summary = False
65+
autodoc_pydantic_model_show_validator_members = False
66+
autodoc_pydantic_model_show_validator_summary = False
67+
autodoc_pydantic_field_list_validators = False
68+
6069
templates_path = ['_templates']
6170
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
6271
# dynamically generated files

mmif/utils/cli/__init__.py

Lines changed: 118 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
import io
77
import os
88
import sys
9-
from typing import Iterator, Optional, TextIO, cast
9+
from typing import Iterator, Optional, TextIO, Type, Union, cast, get_args, get_origin
10+
11+
from pydantic import BaseModel
1012

1113

1214
@contextlib.contextmanager
13-
def open_cli_io_arg(path_or_dash: Optional[str],
14-
mode: str = 'r',
15-
encoding: Optional[str] = None,
16-
errors: Optional[str] = None,
17-
default_stdin: bool = False,
18-
) -> Iterator[TextIO]:
15+
def open_cli_io_arg(
16+
path_or_dash: Optional[str],
17+
mode: str = "r",
18+
encoding: Optional[str] = None,
19+
errors: Optional[str] = None,
20+
default_stdin: bool = False,
21+
) -> Iterator[TextIO]:
1922
"""
2023
Context manager for opening files with stdin/stdout support.
2124
@@ -28,6 +31,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
2831
manager.
2932
3033
Handles the common CLI pattern where:
34+
3135
- '-' means stdin (read mode) or stdout (write mode)
3236
- None means "argument not provided"; when default_stdin=True, it falls back
3337
to stdin/stdout
@@ -54,10 +58,10 @@ def open_cli_io_arg(path_or_dash: Optional[str],
5458
f.write(content)
5559
"""
5660
# Valid text modes for file operations
57-
_READ_FLAGS = frozenset({'r', '+'})
58-
_WRITE_FLAGS = frozenset({'w', 'a', 'x', '+'})
61+
_READ_FLAGS = frozenset({"r", "+"})
62+
_WRITE_FLAGS = frozenset({"w", "a", "x", "+"})
5963

60-
if 'b' in mode:
64+
if "b" in mode:
6165
raise ValueError(
6266
f"Binary mode '{mode}' is not supported. "
6367
"Use text modes ('r', 'w', 'a', 'x') instead."
@@ -66,9 +70,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
6670
needs_read = bool(set(mode) & _READ_FLAGS)
6771
needs_write = bool(set(mode) & _WRITE_FLAGS)
6872

69-
should_use_stdio = path_or_dash == '-' or (
70-
path_or_dash is None and default_stdin
71-
)
73+
should_use_stdio = path_or_dash == "-" or (path_or_dash is None and default_stdin)
7274

7375
file_handle: Optional[TextIO] = None
7476
should_close = False
@@ -83,11 +85,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
8385

8486
if needs_read:
8587
# Check for missing input when stdin is a terminal
86-
if (
87-
path_or_dash is None
88-
and default_stdin
89-
and sys.stdin.isatty()
90-
):
88+
if path_or_dash is None and default_stdin and sys.stdin.isatty():
9189
raise SystemExit("error: No input provided.")
9290
file_handle = sys.stdin
9391

@@ -96,14 +94,15 @@ def open_cli_io_arg(path_or_dash: Optional[str],
9694

9795
else:
9896
raise ValueError(
99-
f"Mode '{mode}' not supported with stdin/stdout "
100-
"(use 'r' or 'w')"
97+
f"Mode '{mode}' not supported with stdin/stdout (use 'r' or 'w')"
10198
)
10299

103100
elif isinstance(path_or_dash, str):
104101
if needs_read and not os.path.exists(path_or_dash):
105102
raise FileNotFoundError(f"Input path does not exist: {path_or_dash}")
106-
file_handle = cast(TextIO, io.open(path_or_dash, mode, encoding=encoding, errors=errors))
103+
file_handle = cast(
104+
TextIO, io.open(path_or_dash, mode, encoding=encoding, errors=errors)
105+
)
107106
should_close = True
108107

109108
elif path_or_dash is None:
@@ -117,13 +116,110 @@ def open_cli_io_arg(path_or_dash: Optional[str],
117116
"Expected str or None."
118117
)
119118

120-
yield file_handle
119+
if file_handle is not None:
120+
yield file_handle
121121

122122
finally:
123123
if should_close and file_handle is not None:
124124
file_handle.close()
125125

126126

127+
def generate_model_summary(model: Type[BaseModel], indent: int = 0) -> str:
128+
lines = []
129+
prefix = " " * indent
130+
131+
# model_fields is a dictionary of FieldInfo objects
132+
for name, field in model.model_fields.items():
133+
# Get the alias if available, otherwise use the field name
134+
field_name = field.alias if field.alias else name
135+
136+
# Get type annotation
137+
type_annotation = field.annotation
138+
139+
def format_type(t) -> str:
140+
origin = get_origin(t)
141+
args = get_args(t)
142+
143+
# Handle Optional (Union[T, None])
144+
if origin is Union and type(None) in args:
145+
non_none_args = [arg for arg in args if arg is not type(None)]
146+
if len(non_none_args) == 1:
147+
return f"{format_type(non_none_args[0])}, optional"
148+
149+
# Handle List
150+
if origin is list:
151+
if args:
152+
return f"[{format_type(args[0])}]"
153+
return "[]"
154+
155+
# Handle Dict
156+
if origin is dict:
157+
return "obj"
158+
159+
# Handle Pydantic Models (Custom Classes)
160+
if isinstance(t, type) and issubclass(t, BaseModel):
161+
return "obj"
162+
163+
# Handle basic types and cleanup
164+
t_str = str(t)
165+
if t_str.startswith("<class '"):
166+
t_str = t_str[8:-2]
167+
if t_str.startswith("typing."):
168+
t_str = t_str[7:]
169+
170+
# Remove module prefix if present
171+
if "." in t_str:
172+
t_str = t_str.split(".")[-1]
173+
174+
return t_str
175+
176+
display_type = format_type(type_annotation)
177+
178+
description = field.description if field.description else ""
179+
180+
line_content = f"{prefix}- {field_name} ({display_type})"
181+
if description:
182+
line_content += f": {description}"
183+
lines.append(line_content)
184+
185+
# Check if it's a Pydantic model or a list/dict of Pydantic models
186+
origin = get_origin(type_annotation)
187+
args = get_args(type_annotation)
188+
189+
nested_model = None
190+
# Handle Optional wrappers for nesting check
191+
check_type = type_annotation
192+
if origin is Union and type(None) in args:
193+
non_none_args = [arg for arg in args if arg is not type(None)]
194+
if len(non_none_args) == 1:
195+
check_type = non_none_args[0]
196+
origin = get_origin(check_type)
197+
args = get_args(check_type)
198+
199+
if isinstance(check_type, type) and issubclass(check_type, BaseModel):
200+
nested_model = check_type
201+
elif (
202+
origin is list
203+
and args
204+
and isinstance(args[0], type)
205+
and issubclass(args[0], BaseModel)
206+
):
207+
nested_model = args[0]
208+
elif (
209+
origin is dict
210+
and args
211+
and len(args) > 1
212+
and isinstance(args[1], type)
213+
and issubclass(args[1], BaseModel)
214+
):
215+
nested_model = args[1]
216+
217+
if nested_model:
218+
lines.append(generate_model_summary(nested_model, indent + 4))
219+
220+
return "\n".join(lines)
221+
222+
127223
# keep imports of CLI modules for historical reasons
128224
# keep them here in the bottom to avoid circular imports
129225
from mmif.utils.cli import rewind

mmif/utils/cli/describe.py

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import argparse
22
import json
3-
import os
43
import sys
54
import textwrap
65
from pathlib import Path
76
from typing import Union, cast
87

9-
from mmif.utils.cli import open_cli_io_arg
10-
from mmif.utils.workflow_helper import generate_workflow_identifier, describe_single_mmif, \
11-
describe_mmif_collection
8+
from mmif.utils.cli import open_cli_io_arg, generate_model_summary
9+
1210
# gen_param_hash is imported for backward compatibility
13-
from mmif.utils.workflow_helper import generate_param_hash
11+
from mmif.utils.workflow_helper import (
12+
CollectionMmifDesc,
13+
SingleMmifDesc,
14+
describe_mmif_collection,
15+
describe_single_mmif,
16+
generate_workflow_identifier,
17+
)
1418

1519

1620
def get_pipeline_specs(mmif_file: Union[str, Path]):
@@ -33,30 +37,19 @@ def describe_argparser():
3337
'collection of MMIF files.'
3438
)
3539

36-
# get and clean docstrings
37-
def _extract_describe_docstring(func):
38-
doc = func.__doc__.split(':param')[0]
39-
# then cut off all lines after `---`
40-
doc = doc.split('---')[0]
41-
return textwrap.dedent(doc).strip()
42-
43-
single_doc = _extract_describe_docstring(describe_single_mmif)
44-
collection_doc = _extract_describe_docstring(describe_mmif_collection)
45-
4640
additional = textwrap.dedent(f"""
4741
This command extracts workflow information from a single MMIF file or
48-
summarizes a directory of MMIF files. The output is serialized as JSON and
49-
includes:
42+
a directory of MMIF files. The output is serialized as JSON.
43+
44+
Output Schemas:
5045
51-
=========================
52-
Single MMIF file as input
53-
=========================
54-
{single_doc}
55-
56-
==================================
57-
A directory of MMIF files as input
58-
==================================
59-
{collection_doc}
46+
1. Single MMIF File (mmif-file):
47+
{generate_model_summary(SingleMmifDesc, indent=4)}
48+
49+
2. MMIF Collection (mmif-dir):
50+
{generate_model_summary(CollectionMmifDesc, indent=4)}
51+
52+
Use `--help-schema` to inspect the full JSON schema for a specific output type.
6053
""")
6154
return oneliner, additional
6255

@@ -67,6 +60,7 @@ def prep_argparser(**kwargs):
6760
formatter_class=argparse.RawDescriptionHelpFormatter,
6861
**kwargs
6962
)
63+
7064
parser.add_argument(
7165
"MMIF_FILE",
7266
nargs="?",
@@ -84,24 +78,37 @@ def prep_argparser(**kwargs):
8478
action="store_true",
8579
help="Pretty-print JSON output"
8680
)
81+
parser.add_argument(
82+
"--help-schema",
83+
nargs=1,
84+
choices=["mmif-file", "mmif-dir"],
85+
metavar="SCHEMA_NAME",
86+
help="Print the JSON schema for the output. Options: mmif-file, mmif-dir."
87+
)
8788
return parser
8889

8990

9091
def main(args):
9192
"""
92-
Main entry point for the describe CLI command.
93-
94-
Reads a MMIF file and outputs a JSON summary containing:
95-
96-
- workflow_id: unique identifier for the source and app sequence
97-
- stats: view counts, annotation counts (total/per-view/per-type), and lists of error/warning/empty view IDs
98-
- views: map of view IDs to app configurations and profiling data
99-
100-
:param args: Parsed command-line arguments
93+
Main block for the describe CLI command.
94+
This function basically works as a wrapper around
95+
:func:`describe_single_mmif` (for single file input) or
96+
:func:`describe_mmif_collection` (for directory input).
10197
"""
98+
if hasattr(args, 'help_schema') and args.help_schema is not None:
99+
schema_name = args.help_schema[0]
100+
if schema_name == 'mmif-file':
101+
model_cls = SingleMmifDesc
102+
elif schema_name == 'mmif-dir':
103+
model_cls = CollectionMmifDesc
104+
105+
schema = model_cls.model_json_schema()
106+
print(json.dumps(schema, indent=2))
107+
sys.exit(0)
108+
102109
output = {}
103110
# if input is a directory
104-
if isinstance(args.MMIF_FILE, (str, os.PathLike)) and Path(args.MMIF_FILE).is_dir():
111+
if Path(str(args.MMIF_FILE)).is_dir():
105112
output = describe_mmif_collection(args.MMIF_FILE)
106113
# if input is a file or stdin
107114
else:
@@ -125,6 +132,7 @@ def main(args):
125132
tmp_path.unlink()
126133

127134
if output:
135+
# Convert Pydantic models to dicts
128136
with open_cli_io_arg(args.output, 'w', default_stdin=True) as output_file:
129137
json.dump(output, output_file, indent=2 if args.pretty else None)
130138
output_file.write('\n')

0 commit comments

Comments
 (0)