Skip to content

Commit 59fdca5

Browse files
authored
fix(cli): harden extension registration and discovery workflows (#2499)
* chore: update community catalog with latest extension versions - Update memory-md from 0.7.9 to 0.8.0 - Update architecture-guard from 1.6.7 to 1.8.0 * fix(cli): harden extension registration with project-level tracking in extensions.yml * test(cli): add comprehensive unit tests for extension registration logic * chore: remove out-of-scope catalog changes * refactor: address PR feedback for extension registration hardening * fix: harden extension registration defensive logic and add comprehensive unregister_hooks tests - Add dict guard to register_hooks() to handle corrupted extensions.yml (non-dict root) - Add 5 comprehensive tests for unregister_hooks() workflow: * Full workflow with hooks + installed list removal * Resilience when config has no 'hooks' key * Corrupted YAML handling * Multiple extension scenarios * All 11 tests passing * fix: sanitize installed to strings, guard unregister_hooks dict, handle null hook values - register_extension(): filter non-string entries from installed before sort - register_hooks(): normalize hooks to {} when missing or not a dict - unregister_hooks(): add isinstance(config, dict) guard before key checks - unregister_hooks(): coerce null/scalar hook lists to [] before iteration - tests: add 3 regression tests for no-hooks manifest, mixed-type installed, null hook values - All 14 tests passing * fix(cli): persist sanitization results and harden hook registration * Harden extension registration to always persist sanitization results * Hardening extension registration: support mapping entries, improve persistence, and fix update rollback * fix(cli): harden extension update and unregistration workflows * fix(cli): move update sentinels outside try block to prevent NameError on rollback * fix(cli): sanitize hook event lists in register_hooks to prevent crashes * fix(cli): deduplicate hook entries and harden rollback hooks-restore guards * test(cli): add regression tests for extension update and rollback hardening * fix(cli): deduplicate installed list by id in register_extension * fix(cli): consolidate and harden extension update rollback logic * fix(cli): initialize backup_registry_entry before try block to prevent UnboundLocalError on rollback * fix(tests): return Path from download_extension mock and add Path import * fix(cli): normalize get_project_config() return to dict; deduplicate in unregister_extension() * fix(cli): normalize hooks/installed/settings in get_project_config(); use tmp_path-scoped zip in tests * fix(cli): set modified=True on hook coercion in rollback; sanitize hook event values in get_project_config(); harden test assertions * fix(cli): filter non-dict hook entries in get_project_config(); remove dead MISSING sentinel * fix(cli): gate extensions.yml rollback on backup_hooks is not None; update stale comment * fix(cli): move _AgentReg import outside try block; assert result.exception is None in tests * fix(extensions): consistent key order in default config; deep-copy backup_installed * test: fix misleading comment; assert exit_code==1 in rollback test * test: clean up duplicate imports in hardening tests * refactor(extensions): extract _sanitize_installed_list helper; strengthen hook unregister assertion * fix(extensions): validate extension IDs in _sanitize_installed_list; clarify test comment
1 parent 2fb9d3b commit 59fdca5

4 files changed

Lines changed: 846 additions & 60 deletions

File tree

src/specify_cli/__init__.py

Lines changed: 63 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4295,6 +4295,10 @@ def extension_update(
42954295
failed_updates = []
42964296
registrar = CommandRegistrar()
42974297
hook_executor = HookExecutor(project_root)
4298+
from .agents import CommandRegistrar as _AgentReg # used in backup and rollback paths
4299+
4300+
# UNSET sentinel: backup not yet captured (exception before backup step)
4301+
UNSET = object()
42984302

42994303
for update in updates_available:
43004304
extension_id = update["id"]
@@ -4308,8 +4312,9 @@ def extension_update(
43084312
backup_config_dir = backup_base / "config"
43094313

43104314
# Store backup state
4311-
backup_registry_entry = None
4312-
backup_hooks = None # None means no hooks key in config; {} means hooks key existed
4315+
backup_registry_entry = None # None means registry entry not yet captured
4316+
backup_installed = UNSET # Original installed list from extensions.yml
4317+
backup_hooks = None # None means backup step 4 not yet reached; {} or {...} means backup was captured
43134318
backed_up_command_files = {}
43144319

43154320
try:
@@ -4334,8 +4339,7 @@ def extension_update(
43344339
shutil.copy2(cfg_file, backup_config_dir / cfg_file.name)
43354340

43364341
# 3. Backup command files for all agents
4337-
from .agents import CommandRegistrar as _AgentReg
4338-
registered_commands = backup_registry_entry.get("registered_commands", {})
4342+
registered_commands = backup_registry_entry.get("registered_commands", {}) if isinstance(backup_registry_entry, dict) else {}
43394343
for agent_name, cmd_names in registered_commands.items():
43404344
if agent_name not in registrar.AGENT_CONFIGS:
43414345
continue
@@ -4360,14 +4364,20 @@ def extension_update(
43604364
shutil.copy2(prompt_file, backup_prompt_path)
43614365
backed_up_command_files[str(prompt_file)] = str(backup_prompt_path)
43624366

4363-
# 4. Backup hooks from extensions.yml
4364-
# Use backup_hooks=None to indicate config had no "hooks" key (don't create on restore)
4365-
# Use backup_hooks={} to indicate config had "hooks" key with no hooks for this extension
4367+
# 4. Backup hooks and installed list from extensions.yml
4368+
# get_project_config() always normalizes installed->[] and hooks->{},
4369+
# so no sentinel is needed to distinguish key-absent from key-empty.
43664370
config = hook_executor.get_project_config()
4367-
if "hooks" in config:
4368-
backup_hooks = {} # Config has hooks key - preserve this fact
4369-
for hook_name, hook_list in config["hooks"].items():
4370-
ext_hooks = [h for h in hook_list if h.get("extension") == extension_id]
4371+
if isinstance(config, dict):
4372+
import copy
4373+
# Deep-copy so nested mapping entries (e.g. version-pin dicts)
4374+
# are not affected by in-place mutations during the update.
4375+
backup_installed = copy.deepcopy(config.get("installed", []))
4376+
backup_hooks = {}
4377+
for hook_name, hook_list in config.get("hooks", {}).items():
4378+
if not isinstance(hook_list, list):
4379+
continue
4380+
ext_hooks = [h for h in hook_list if isinstance(h, dict) and h.get("extension") == extension_id]
43714381
if ext_hooks:
43724382
backup_hooks[hook_name] = ext_hooks
43734383

@@ -4520,35 +4530,51 @@ def extension_update(
45204530
original_file.parent.mkdir(parents=True, exist_ok=True)
45214531
shutil.copy2(backup_file, original_file)
45224532

4523-
# Restore hooks in extensions.yml
4524-
# - backup_hooks=None means original config had no "hooks" key
4525-
# - backup_hooks={} or {...} means config had hooks key
4526-
config = hook_executor.get_project_config()
4527-
if "hooks" in config:
4533+
# Restore metadata in extensions.yml (hooks and installed list).
4534+
# Only run if backup step 4 was reached (backup_hooks is not None);
4535+
# otherwise we have no safe baseline to restore from and could corrupt
4536+
# the config by removing pre-existing hooks.
4537+
if backup_hooks is not None:
4538+
config = hook_executor.get_project_config()
4539+
if not isinstance(config, dict):
4540+
config = {}
4541+
45284542
modified = False
45294543

4530-
if backup_hooks is None:
4531-
# Original config had no "hooks" key; remove it entirely
4532-
del config["hooks"]
4544+
# 1. Restore hooks in extensions.yml
4545+
if not isinstance(config.get("hooks"), dict):
4546+
config["hooks"] = {}
45334547
modified = True
4534-
else:
4535-
# Remove any hooks for this extension added by failed install
4536-
for hook_name, hooks_list in config["hooks"].items():
4537-
original_len = len(hooks_list)
4538-
config["hooks"][hook_name] = [
4539-
h for h in hooks_list
4540-
if h.get("extension") != extension_id
4541-
]
4542-
if len(config["hooks"][hook_name]) != original_len:
4543-
modified = True
4544-
4545-
# Add back the backed up hooks if any
4546-
if backup_hooks:
4547-
for hook_name, hooks in backup_hooks.items():
4548-
if hook_name not in config["hooks"]:
4549-
config["hooks"][hook_name] = []
4550-
config["hooks"][hook_name].extend(hooks)
4551-
modified = True
4548+
4549+
# Remove any hooks for this extension added by the failed install
4550+
for hook_name in list(config["hooks"].keys()):
4551+
hooks_list = config["hooks"][hook_name]
4552+
if not isinstance(hooks_list, list):
4553+
config["hooks"][hook_name] = []
4554+
modified = True
4555+
continue
4556+
4557+
original_len = len(hooks_list)
4558+
config["hooks"][hook_name] = [
4559+
h for h in hooks_list
4560+
if isinstance(h, dict) and h.get("extension") != extension_id
4561+
]
4562+
if len(config["hooks"][hook_name]) != original_len:
4563+
modified = True
4564+
4565+
# Add back the backed-up hooks
4566+
if backup_hooks:
4567+
for hook_name, hooks in backup_hooks.items():
4568+
if not isinstance(config["hooks"].get(hook_name), list):
4569+
config["hooks"][hook_name] = []
4570+
config["hooks"][hook_name].extend(hooks)
4571+
modified = True
4572+
4573+
# 2. Restore installed list in extensions.yml
4574+
if backup_installed is not UNSET:
4575+
if config.get("installed") != backup_installed:
4576+
config["installed"] = backup_installed
4577+
modified = True
45524578

45534579
if modified:
45544580
hook_executor.save_project_config(config)

src/specify_cli/extensions.py

Lines changed: 177 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,7 +1190,7 @@ def install_from_directory(
11901190
# was used during project initialisation (feature parity).
11911191
registered_skills = self._register_extension_skills(manifest, dest_dir)
11921192

1193-
# Register hooks
1193+
# Register hooks and update installed list in extensions.yml
11941194
hook_executor = HookExecutor(self.project_root)
11951195
hook_executor.register_hooks(manifest)
11961196

@@ -2481,7 +2481,32 @@ def get_project_config(self) -> Dict[str, Any]:
24812481
}
24822482

24832483
try:
2484-
return yaml.safe_load(self.config_file.read_text(encoding="utf-8")) or {}
2484+
result = yaml.safe_load(self.config_file.read_text(encoding="utf-8"))
2485+
# Coerce non-dict root (including None for an empty file) to the
2486+
# fully-normalized default so callers always get guaranteed fields.
2487+
if not isinstance(result, dict):
2488+
return {
2489+
"installed": [],
2490+
"settings": {"auto_execute_hooks": True},
2491+
"hooks": {},
2492+
}
2493+
# Normalize nested fields so read-only callers like get_hooks_for_event()
2494+
# never see non-dict hooks or non-list installed (Feedback)
2495+
if not isinstance(result.get("hooks"), dict):
2496+
result["hooks"] = {}
2497+
if not isinstance(result.get("installed"), list):
2498+
result["installed"] = []
2499+
if not isinstance(result.get("settings"), dict):
2500+
result["settings"] = {"auto_execute_hooks": True}
2501+
# Sanitize hook event values: coerce non-list values to [] and filter
2502+
# non-dict items so get_hooks_for_event() can safely call .get() (Feedback)
2503+
for event_key in list(result["hooks"]):
2504+
event_val = result["hooks"][event_key]
2505+
if not isinstance(event_val, list):
2506+
result["hooks"][event_key] = []
2507+
else:
2508+
result["hooks"][event_key] = [h for h in event_val if isinstance(h, dict)]
2509+
return result
24852510
except (yaml.YAMLError, OSError, UnicodeError):
24862511
return {
24872512
"installed": [],
@@ -2501,25 +2526,141 @@ def save_project_config(self, config: Dict[str, Any]):
25012526
encoding="utf-8",
25022527
)
25032528

2529+
def register_extension(self, extension_id: str):
2530+
"""Add extension to the installed list in project config.
2531+
2532+
Args:
2533+
extension_id: ID of extension to register
2534+
"""
2535+
config = self.get_project_config()
2536+
2537+
# Ensure config is a dict (defensive)
2538+
if not isinstance(config, dict):
2539+
config = {}
2540+
2541+
raw_installed = config.get("installed")
2542+
sanitized = self._sanitize_installed_list(raw_installed, add_id=extension_id)
2543+
2544+
if sanitized != raw_installed:
2545+
config["installed"] = sanitized
2546+
self.save_project_config(config)
2547+
2548+
def unregister_extension(self, extension_id: str):
2549+
"""Remove extension from the installed list in project config.
2550+
2551+
Args:
2552+
extension_id: ID of extension to unregister
2553+
"""
2554+
config = self.get_project_config()
2555+
2556+
if not isinstance(config, dict):
2557+
config = {}
2558+
2559+
raw_installed = config.get("installed")
2560+
sanitized = self._sanitize_installed_list(raw_installed, remove_id=extension_id)
2561+
2562+
# Always persist if sanitized state differs from raw config (ensures normalization)
2563+
if sanitized != raw_installed:
2564+
config["installed"] = sanitized
2565+
self.save_project_config(config)
2566+
2567+
@staticmethod
2568+
def _sanitize_installed_list(
2569+
raw: object,
2570+
*,
2571+
add_id: str = "",
2572+
remove_id: str = "",
2573+
) -> list:
2574+
"""Normalize, deduplicate, and optionally add/remove an extension id.
2575+
2576+
Shared by register_extension() and unregister_extension() to prevent
2577+
the two paths from drifting.
2578+
2579+
Args:
2580+
raw: The raw value from config["installed"] (may be non-list).
2581+
add_id: If non-empty, ensure this id is present (plain-string fallback).
2582+
remove_id: If non-empty, remove this id from the list.
2583+
2584+
Returns:
2585+
A sanitized, deduplicated, alphabetically-sorted list.
2586+
"""
2587+
_VALID_ID = re.compile(r'^[a-z0-9-]+$')
2588+
2589+
installed = raw if isinstance(raw, list) else []
2590+
2591+
# Keep only entries whose resolved id is a non-empty string matching
2592+
# the extension-id format (^[a-z0-9-]+$), same rule ExtensionManifest enforces.
2593+
def _valid_entry(x: object) -> bool:
2594+
if isinstance(x, str):
2595+
return bool(_VALID_ID.match(x.strip()))
2596+
if isinstance(x, dict):
2597+
eid = x.get("id")
2598+
return isinstance(eid, str) and bool(_VALID_ID.match(eid.strip()))
2599+
return False
2600+
2601+
valid = [x for x in installed if _valid_entry(x)]
2602+
2603+
# Deduplicate by id: prefer dict (richer metadata) over plain string
2604+
seen: dict = {} # id -> entry (dict preferred over str)
2605+
for x in valid:
2606+
eid = x.strip() if isinstance(x, str) else x.get("id", "").strip()
2607+
if eid not in seen or isinstance(x, dict):
2608+
seen[eid] = x
2609+
2610+
# Validate add_id against the same regex before inserting
2611+
if add_id and _VALID_ID.match(add_id.strip()) and add_id not in seen:
2612+
seen[add_id] = add_id
2613+
2614+
if remove_id:
2615+
seen.pop(remove_id, None)
2616+
2617+
def _sort_key(x: object) -> str:
2618+
return x if isinstance(x, str) else x.get("id", "") # type: ignore[return-value]
2619+
2620+
return sorted(seen.values(), key=_sort_key)
2621+
25042622
def register_hooks(self, manifest: ExtensionManifest):
25052623
"""Register extension hooks in project config.
25062624
25072625
Args:
25082626
manifest: Extension manifest with hooks to register
25092627
"""
2628+
# Always ensure the extension is in the installed list
2629+
self.register_extension(manifest.id)
2630+
25102631
if not hasattr(manifest, "hooks") or not manifest.hooks:
25112632
return
25122633

25132634
config = self.get_project_config()
25142635

2515-
# Ensure hooks dict exists
2516-
if "hooks" not in config:
2636+
# Ensure config is a dict (defensive)
2637+
changed = False
2638+
if not isinstance(config, dict):
2639+
config = {}
2640+
changed = True
2641+
2642+
# Ensure hooks dict exists and is a mapping
2643+
if "hooks" not in config or not isinstance(config["hooks"], dict):
25172644
config["hooks"] = {}
2645+
changed = True
2646+
else:
2647+
# Sanitize existing hook lists to prevent crashes in downstream code (Feedback)
2648+
for h_name in list(config["hooks"].keys()):
2649+
h_list = config["hooks"][h_name]
2650+
if not isinstance(h_list, list):
2651+
config["hooks"][h_name] = []
2652+
changed = True
2653+
else:
2654+
sanitized_h_list = [h for h in h_list if isinstance(h, dict)]
2655+
if len(sanitized_h_list) != len(h_list):
2656+
config["hooks"][h_name] = sanitized_h_list
2657+
changed = True
25182658

25192659
# Register each hook
25202660
for hook_name, hook_config in manifest.hooks.items():
2521-
if hook_name not in config["hooks"]:
2661+
if hook_name not in config["hooks"] or not isinstance(config["hooks"][hook_name], list):
25222662
config["hooks"][hook_name] = []
2663+
changed = True
25232664

25242665
# Add hook entry
25252666
hook_entry = {
@@ -2534,40 +2675,53 @@ def register_hooks(self, manifest: ExtensionManifest):
25342675
"condition": hook_config.get("condition"),
25352676
}
25362677

2537-
# Check if already registered
2538-
existing = [
2539-
h
2540-
for h in config["hooks"][hook_name]
2541-
if h.get("extension") == manifest.id
2678+
# Deduplicate: remove all existing entries for this extension on this
2679+
# hook event, then append the single canonical entry. This prevents
2680+
# multiple hooks firing when hand-edited or older versions leave
2681+
# duplicate entries behind. (Feedback from review)
2682+
original_list = config["hooks"][hook_name]
2683+
deduped = [
2684+
h for h in original_list
2685+
if not (isinstance(h, dict) and h.get("extension") == manifest.id)
25422686
]
2687+
deduped.append(hook_entry)
2688+
if deduped != original_list:
2689+
config["hooks"][hook_name] = deduped
2690+
changed = True
25432691

2544-
if not existing:
2545-
config["hooks"][hook_name].append(hook_entry)
2546-
else:
2547-
# Update existing
2548-
for i, h in enumerate(config["hooks"][hook_name]):
2549-
if h.get("extension") == manifest.id:
2550-
config["hooks"][hook_name][i] = hook_entry
2551-
2552-
self.save_project_config(config)
2692+
if changed:
2693+
self.save_project_config(config)
25532694

25542695
def unregister_hooks(self, extension_id: str):
25552696
"""Remove extension hooks from project config.
25562697
25572698
Args:
25582699
extension_id: ID of extension to unregister
25592700
"""
2701+
# Always remove from installed list (Feedback from review)
2702+
self.unregister_extension(extension_id)
2703+
25602704
config = self.get_project_config()
25612705

2562-
if "hooks" not in config:
2706+
if not isinstance(config, dict):
2707+
config = {}
2708+
# We don't save yet, as there are no hooks to unregister,
2709+
# but unregister_extension above might have already saved a normalized config.
2710+
return
2711+
2712+
if "hooks" not in config or not isinstance(config["hooks"], dict):
25632713
return
25642714

25652715
# Remove hooks for this extension
2566-
for hook_name in config["hooks"]:
2716+
for hook_name in list(config["hooks"].keys()):
2717+
hook_list = config["hooks"][hook_name]
2718+
if not isinstance(hook_list, list):
2719+
config["hooks"][hook_name] = []
2720+
continue
25672721
config["hooks"][hook_name] = [
25682722
h
2569-
for h in config["hooks"][hook_name]
2570-
if h.get("extension") != extension_id
2723+
for h in hook_list
2724+
if isinstance(h, dict) and h.get("extension") != extension_id
25712725
]
25722726

25732727
# Clean up empty hook arrays

0 commit comments

Comments
 (0)