Skip to content

Commit 229508d

Browse files
committed
added human-friendly summary for pydantic classes in describe --help
1 parent 8266a2e commit 229508d

2 files changed

Lines changed: 138 additions & 50 deletions

File tree

mmif/utils/cli/__init__.py

Lines changed: 115 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
import io
77
import os
88
import sys
9-
from typing import Iterator, Optional, TextIO, cast
9+
from typing import Iterator, Optional, TextIO, Type, Union, cast, get_args, get_origin
10+
11+
from pydantic import BaseModel
1012

1113

1214
@contextlib.contextmanager
13-
def open_cli_io_arg(path_or_dash: Optional[str],
14-
mode: str = 'r',
15-
encoding: Optional[str] = None,
16-
errors: Optional[str] = None,
17-
default_stdin: bool = False,
18-
) -> Iterator[TextIO]:
15+
def open_cli_io_arg(
16+
path_or_dash: Optional[str],
17+
mode: str = "r",
18+
encoding: Optional[str] = None,
19+
errors: Optional[str] = None,
20+
default_stdin: bool = False,
21+
) -> Iterator[TextIO]:
1922
"""
2023
Context manager for opening files with stdin/stdout support.
2124
@@ -55,10 +58,10 @@ def open_cli_io_arg(path_or_dash: Optional[str],
5558
f.write(content)
5659
"""
5760
# Valid text modes for file operations
58-
_READ_FLAGS = frozenset({'r', '+'})
59-
_WRITE_FLAGS = frozenset({'w', 'a', 'x', '+'})
61+
_READ_FLAGS = frozenset({"r", "+"})
62+
_WRITE_FLAGS = frozenset({"w", "a", "x", "+"})
6063

61-
if 'b' in mode:
64+
if "b" in mode:
6265
raise ValueError(
6366
f"Binary mode '{mode}' is not supported. "
6467
"Use text modes ('r', 'w', 'a', 'x') instead."
@@ -67,9 +70,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
6770
needs_read = bool(set(mode) & _READ_FLAGS)
6871
needs_write = bool(set(mode) & _WRITE_FLAGS)
6972

70-
should_use_stdio = path_or_dash == '-' or (
71-
path_or_dash is None and default_stdin
72-
)
73+
should_use_stdio = path_or_dash == "-" or (path_or_dash is None and default_stdin)
7374

7475
file_handle: Optional[TextIO] = None
7576
should_close = False
@@ -84,11 +85,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
8485

8586
if needs_read:
8687
# Check for missing input when stdin is a terminal
87-
if (
88-
path_or_dash is None
89-
and default_stdin
90-
and sys.stdin.isatty()
91-
):
88+
if path_or_dash is None and default_stdin and sys.stdin.isatty():
9289
raise SystemExit("error: No input provided.")
9390
file_handle = sys.stdin
9491

@@ -97,14 +94,15 @@ def open_cli_io_arg(path_or_dash: Optional[str],
9794

9895
else:
9996
raise ValueError(
100-
f"Mode '{mode}' not supported with stdin/stdout "
101-
"(use 'r' or 'w')"
97+
f"Mode '{mode}' not supported with stdin/stdout (use 'r' or 'w')"
10298
)
10399

104100
elif isinstance(path_or_dash, str):
105101
if needs_read and not os.path.exists(path_or_dash):
106102
raise FileNotFoundError(f"Input path does not exist: {path_or_dash}")
107-
file_handle = cast(TextIO, io.open(path_or_dash, mode, encoding=encoding, errors=errors))
103+
file_handle = cast(
104+
TextIO, io.open(path_or_dash, mode, encoding=encoding, errors=errors)
105+
)
108106
should_close = True
109107

110108
elif path_or_dash is None:
@@ -126,6 +124,102 @@ def open_cli_io_arg(path_or_dash: Optional[str],
126124
file_handle.close()
127125

128126

127+
def generate_model_summary(model: Type[BaseModel], indent: int = 0) -> str:
128+
lines = []
129+
prefix = " " * indent
130+
131+
# model_fields is a dictionary of FieldInfo objects
132+
for name, field in model.model_fields.items():
133+
# Get the alias if available, otherwise use the field name
134+
field_name = field.alias if field.alias else name
135+
136+
# Get type annotation
137+
type_annotation = field.annotation
138+
139+
def format_type(t) -> str:
140+
origin = get_origin(t)
141+
args = get_args(t)
142+
143+
# Handle Optional (Union[T, None])
144+
if origin is Union and type(None) in args:
145+
non_none_args = [arg for arg in args if arg is not type(None)]
146+
if len(non_none_args) == 1:
147+
return f"{format_type(non_none_args[0])}, optional"
148+
149+
# Handle List
150+
if origin is list:
151+
if args:
152+
return f"[{format_type(args[0])}]"
153+
return "[]"
154+
155+
# Handle Dict
156+
if origin is dict:
157+
return "obj"
158+
159+
# Handle Pydantic Models (Custom Classes)
160+
if isinstance(t, type) and issubclass(t, BaseModel):
161+
return "obj"
162+
163+
# Handle basic types and cleanup
164+
t_str = str(t)
165+
if t_str.startswith("<class '"):
166+
t_str = t_str[8:-2]
167+
if t_str.startswith("typing."):
168+
t_str = t_str[7:]
169+
170+
# Remove module prefix if present
171+
if "." in t_str:
172+
t_str = t_str.split(".")[-1]
173+
174+
return t_str
175+
176+
display_type = format_type(type_annotation)
177+
178+
description = field.description if field.description else ""
179+
180+
line_content = f"{prefix}- {field_name} ({display_type})"
181+
if description:
182+
line_content += f": {description}"
183+
lines.append(line_content)
184+
185+
# Check if it's a Pydantic model or a list/dict of Pydantic models
186+
origin = get_origin(type_annotation)
187+
args = get_args(type_annotation)
188+
189+
nested_model = None
190+
# Handle Optional wrappers for nesting check
191+
check_type = type_annotation
192+
if origin is Union and type(None) in args:
193+
non_none_args = [arg for arg in args if arg is not type(None)]
194+
if len(non_none_args) == 1:
195+
check_type = non_none_args[0]
196+
origin = get_origin(check_type)
197+
args = get_args(check_type)
198+
199+
if isinstance(check_type, type) and issubclass(check_type, BaseModel):
200+
nested_model = check_type
201+
elif (
202+
origin is list
203+
and args
204+
and isinstance(args[0], type)
205+
and issubclass(args[0], BaseModel)
206+
):
207+
nested_model = args[0]
208+
elif (
209+
origin is dict
210+
and args
211+
and len(args) > 1
212+
and isinstance(args[1], type)
213+
and issubclass(args[1], BaseModel)
214+
):
215+
nested_model = args[1]
216+
217+
if nested_model:
218+
lines.append(generate_model_summary(nested_model, indent + 4))
219+
220+
return "\n".join(lines)
221+
222+
129223
# keep imports of CLI modules for historical reasons
130224
# keep them here in the bottom to avoid circular imports
131225
from mmif.utils.cli import rewind

mmif/utils/cli/describe.py

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33
import sys
44
import textwrap
55
from pathlib import Path
6-
from typing import Dict, Type, Union, cast
6+
from typing import Union, cast
77

8-
from pydantic import BaseModel
9-
10-
from mmif.utils.cli import open_cli_io_arg
8+
from mmif.utils.cli import open_cli_io_arg, generate_model_summary
119

1210
# gen_param_hash is imported for backward compatibility
1311
from mmif.utils.workflow_helper import (
@@ -18,12 +16,6 @@
1816
generate_workflow_identifier,
1917
)
2018

21-
models_to_help = [SingleMmifDesc, CollectionMmifDesc]
22-
model_modules = set(model.__module__ for model in models_to_help)
23-
def get_all_models() -> Dict[str, Type[BaseModel]]:
24-
return {
25-
name: cls for name, cls in models_to_help
26-
}
2719

2820
def get_pipeline_specs(mmif_file: Union[str, Path]):
2921
import warnings
@@ -49,7 +41,15 @@ def describe_argparser():
4941
This command extracts workflow information from a single MMIF file or
5042
a directory of MMIF files. The output is serialized as JSON.
5143
52-
Use `--help-schemas` to inspect the structure of the JSON output.
44+
Output Schemas:
45+
46+
1. Single MMIF File (mmif-file):
47+
{generate_model_summary(SingleMmifDesc, indent=4)}
48+
49+
2. MMIF Collection (mmif-dir):
50+
{generate_model_summary(CollectionMmifDesc, indent=4)}
51+
52+
Use `--help-schema` to inspect the full JSON schema for a specific output type.
5353
""")
5454
return oneliner, additional
5555

@@ -79,13 +79,11 @@ def prep_argparser(**kwargs):
7979
help="Pretty-print JSON output"
8080
)
8181
parser.add_argument(
82-
"--help-schemas",
83-
nargs="*",
84-
choices=["all"] + [m.__name__ for m in models_to_help],
82+
"--help-schema",
83+
nargs=1,
84+
choices=["mmif-file", "mmif-dir"],
8585
metavar="SCHEMA_NAME",
86-
help=f"Print the JSON schema for the output. For human-readable documentation, "
87-
f"visit https://clams.ai/mmif-python and see the following modules: "
88-
f"{', '.join(model_modules)}.\nOptions: all, {', '.join([m.__name__ for m in models_to_help])}."
86+
help="Print the JSON schema for the output. Options: mmif-file, mmif-dir."
8987
)
9088
return parser
9189

@@ -97,19 +95,15 @@ def main(args):
9795
:func:`describe_single_mmif` (for single file input) or
9896
:func:`describe_mmif_collection` (for directory input).
9997
"""
100-
if hasattr(args, 'help_schemas') and args.help_schemas is not None:
101-
models_map = {m.__name__: m for m in models_to_help}
102-
to_show = []
103-
if len(args.help_schemas) == 0 or 'all' in args.help_schemas:
104-
to_show = [m.__name__ for m in models_to_help]
105-
else:
106-
to_show = args.help_schemas
98+
if hasattr(args, 'help_schema') and args.help_schema is not None:
99+
schema_name = args.help_schema[0]
100+
if schema_name == 'mmif-file':
101+
model_cls = SingleMmifDesc
102+
elif schema_name == 'mmif-dir':
103+
model_cls = CollectionMmifDesc
107104

108-
for name in to_show:
109-
model_cls = models_map[name]
110-
schema = model_cls.model_json_schema()
111-
print(json.dumps(schema, indent=2))
112-
print()
105+
schema = model_cls.model_json_schema()
106+
print(json.dumps(schema, indent=2))
113107
sys.exit(0)
114108

115109
output = {}

0 commit comments

Comments
 (0)