From 2cbc652d61f535d8235a76d0765b70edd8be476d Mon Sep 17 00:00:00 2001 From: lil-aditya Date: Thu, 2 Apr 2026 10:51:25 +0530 Subject: [PATCH 1/2] chore: add clean-base verification and smoke harness --- .github/workflows/smoke.yml | 17 +++ CONTRIBUTING.md | 6 + docs/dev/upstream_sync_playbook.md | 65 +++++++++ scripts/verify_clean_base.py | 215 +++++++++++++++++++++++++++++ tests_smoke/test_smoke_app.py | 56 ++++++++ 5 files changed, 359 insertions(+) create mode 100644 .github/workflows/smoke.yml create mode 100644 docs/dev/upstream_sync_playbook.md create mode 100644 scripts/verify_clean_base.py create mode 100644 tests_smoke/test_smoke_app.py diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml new file mode 100644 index 00000000..1c728172 --- /dev/null +++ b/.github/workflows/smoke.yml @@ -0,0 +1,17 @@ +name: smoke + +on: + pull_request: + +jobs: + smoke: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install runtime dependencies + run: python -m pip install -r requirements.txt + - name: Run clean-base verification + run: python scripts/verify_clean_base.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 498e472c..de8edc0c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -73,6 +73,12 @@ The `pr-intake` workflow is advisory. If required structure is missing, it may a 5. Update docs for any setup, architecture, or workflow change 6. Open a PR into `EAPD-DRB/MUIOGO:main` using the repository PR template +## Clean-base verification + +Run `python scripts/verify_clean_base.py` before upstream sync merges and before requesting review on guardrail work. It checks unresolved git operation state, conflict markers, Python compilation, and the stdlib smoke harness. + +See `docs/dev/upstream_sync_playbook.md` for the v5.5 sync order, overlap inventory, and rejected upstream patterns. + ## Required branching rule Every implementation contribution must use: diff --git a/docs/dev/upstream_sync_playbook.md b/docs/dev/upstream_sync_playbook.md new file mode 100644 index 00000000..db4ecb87 --- /dev/null +++ b/docs/dev/upstream_sync_playbook.md @@ -0,0 +1,65 @@ +# Upstream Sync Playbook + +This repository tracks upstream MUIO releases. For sync work such as the v5.5 stack in [#388](https://github.com/EAPD-DRB/MUIOGO/issues/388), follow upstream by default and only diverge when MUIOGO needs portability, security, runtime reliability, or downstream integration fixes. + +## Sync Order + +Land the v5.5 stack in this order: + +1. [#389](https://github.com/EAPD-DRB/MUIOGO/issues/389): guardrails and smoke harness. This PR is the gate. +2. Backend and runtime safe changes from the remaining [#388](https://github.com/EAPD-DRB/MUIOGO/issues/388) child work. +3. Diagnostics UI follow-up work. +4. Result metadata bundle follow-up work, including `Variables.json` and rendering alignment. + +Use [#390](https://github.com/EAPD-DRB/MUIOGO/issues/390) as the overlap inventory before resolving conflicts or choosing a deliberate downstream divergence. + +## Protected Overlap Surface + +Review these files carefully in every upstream sync PR because they are the highest-overlap backend/runtime touch points: + +- `API/Classes/Base/Config.py` +- `API/Classes/Base/FileClass.py` +- `API/Classes/Case/DataFileClass.py` +- `API/Classes/Case/OsemosysClass.py` +- `API/Routes/DataFile/DataFileRoute.py` +- `API/app.py` + +If a PR touches any of them, state whether the change follows upstream as-is or intentionally diverges and why. + +## Rejected Upstream Patterns + +Do not land these patterns without an explicit maintainer decision: + +- writing logs under `WebAPP/`, including `WebAPP/app.log` +- deleting logs on startup +- current-working-directory-relative paths for important I/O +- `shell=True` subprocess usage +- compact JSON as the default write format +- `FileClassCompressed.py` +- similar compression or logging rewrites that reduce portability or make runtime behavior harder to reason about + +## Clean-Base Verification + +Before starting an upstream merge, after resolving conflicts, and before requesting review, run: + +```bash +python scripts/verify_clean_base.py +``` + +This command checks: + +- unresolved git operation state from `.git` control files such as `MERGE_HEAD` and rebase markers +- conflict markers in common tracked text files +- Python source compilation without assuming the repo root is writable +- stdlib `unittest` smoke coverage for app import and fixed routes + +Fix every reported failure before continuing with the sync. + +## Validation Scope For PR 1 + +For [#389](https://github.com/EAPD-DRB/MUIOGO/issues/389), keep validation intentionally small: + +- run `python scripts/verify_clean_base.py` +- confirm the smoke harness passes on a supported Python interpreter + +The CBC demo and broader backend/runtime validation belong to later PRs in the [#388](https://github.com/EAPD-DRB/MUIOGO/issues/388) stack, not this guardrail PR. diff --git a/scripts/verify_clean_base.py b/scripts/verify_clean_base.py new file mode 100644 index 00000000..0c3636db --- /dev/null +++ b/scripts/verify_clean_base.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +import os +from pathlib import Path +import py_compile +import subprocess +import sys +import tempfile +from typing import Iterable + + +REPO_ROOT = Path(__file__).resolve().parents[1] +TEXT_EXTENSIONS = { + ".css", + ".html", + ".js", + ".json", + ".md", + ".py", + ".txt", + ".yaml", + ".yml", +} +SKIP_DIR_NAMES = { + ".git", + ".mypy_cache", + ".pytest_cache", + ".venv", + "__pycache__", + "node_modules", + "venv", +} + + +def resolve_git_dir(root: Path) -> Path | None: + git_path = root / ".git" + if git_path.is_dir(): + return git_path + if not git_path.is_file(): + return None + + try: + first_line = git_path.read_text(encoding="utf-8").splitlines()[0].strip() + except (IndexError, OSError): + return None + + prefix = "gitdir:" + if not first_line.lower().startswith(prefix): + return None + + git_dir = first_line[len(prefix) :].strip() + candidate = Path(git_dir) + if not candidate.is_absolute(): + candidate = (root / candidate).resolve() + return candidate + + +def iter_repo_files(root: Path, suffixes: set[str]) -> Iterable[Path]: + git_list = subprocess.run( + [ + "git", + "-C", + str(root), + "ls-files", + "--cached", + "--others", + "--exclude-standard", + "-z", + ], + capture_output=True, + check=False, + ) + if git_list.returncode == 0: + for raw_path in git_list.stdout.split(b"\x00"): + if not raw_path: + continue + path = root / raw_path.decode("utf-8", "surrogateescape") + if path.suffix.lower() not in suffixes: + continue + if any(part in SKIP_DIR_NAMES for part in path.parts): + continue + yield path + return + + for path in root.rglob("*"): + if path.is_dir(): + continue + if path.suffix.lower() not in suffixes: + continue + if any(part in SKIP_DIR_NAMES for part in path.parts): + continue + yield path + + +def check_unresolved_git_state(root: Path) -> list[str]: + git_dir = resolve_git_dir(root) + if git_dir is None: + return ["Git metadata not found; cannot verify merge or rebase state."] + + issues: list[str] = [] + markers = ( + "MERGE_HEAD", + "CHERRY_PICK_HEAD", + "REVERT_HEAD", + "REBASE_HEAD", + ) + for marker in markers: + if (git_dir / marker).exists(): + issues.append(f"Git operation in progress: {git_dir / marker}") + + for rebase_dir in ("rebase-apply", "rebase-merge"): + if (git_dir / rebase_dir).exists(): + issues.append(f"Git rebase in progress: {git_dir / rebase_dir}") + + return issues + + +def check_conflict_markers(root: Path) -> list[str]: + hits: list[str] = [] + + for path in iter_repo_files(root, TEXT_EXTENSIONS): + try: + with path.open("r", encoding="utf-8", errors="ignore") as handle: + for line_number, line in enumerate(handle, start=1): + stripped = line.lstrip() + if stripped.startswith("<<<<<<<"): + hits.append(f"{path.relative_to(root)}:{line_number}") + break + except OSError as exc: + hits.append(f"{path.relative_to(root)}: unreadable ({exc})") + + return hits + + +def run_py_compile(root: Path) -> list[str]: + failures: list[str] = [] + python_files = list(iter_repo_files(root, {".py"})) + + for path in python_files: + file_descriptor, temp_path = tempfile.mkstemp( + prefix="verify-clean-base-", + suffix=".pyc", + ) + os.close(file_descriptor) + try: + py_compile.compile( + str(path), + cfile=temp_path, + doraise=True, + ) + except (OSError, py_compile.PyCompileError) as exc: + failures.append(f"{path.relative_to(root)}: {exc}") + finally: + try: + os.remove(temp_path) + except OSError: + pass + + return failures + + +def run_smoke_tests(root: Path) -> int: + command = [ + sys.executable, + "-m", + "unittest", + "discover", + "-s", + "tests_smoke", + "-p", + "test_*.py", + ] + process = subprocess.run(command, cwd=root) + return process.returncode + + +def main() -> int: + print(f"[verify] repo root: {REPO_ROOT}") + + git_state = check_unresolved_git_state(REPO_ROOT) + if git_state: + print("[FAIL] unresolved git state detected:") + for issue in git_state: + print(f" - {issue}") + return 1 + print("[OK] git state: clean") + + conflicts = check_conflict_markers(REPO_ROOT) + if conflicts: + print("[FAIL] conflict markers found:") + for conflict in conflicts: + print(f" - {conflict}") + return 1 + print("[OK] no conflict markers found") + + compile_failures = run_py_compile(REPO_ROOT) + if compile_failures: + print("[FAIL] python compile step failed:") + for failure in compile_failures: + print(f" - {failure}") + return 1 + print("[OK] python compile step passed") + + smoke_status = run_smoke_tests(REPO_ROOT) + if smoke_status != 0: + print(f"[FAIL] smoke tests failed (rc={smoke_status})") + return smoke_status + print("[OK] smoke tests passed") + + print("[SUCCESS] clean-base verification passed") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests_smoke/test_smoke_app.py b/tests_smoke/test_smoke_app.py new file mode 100644 index 00000000..76c412ff --- /dev/null +++ b/tests_smoke/test_smoke_app.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import importlib +import sys +from pathlib import Path +import unittest + + +REPO_ROOT = Path(__file__).resolve().parents[1] +API_DIR = REPO_ROOT / "API" + +if str(API_DIR) not in sys.path: + sys.path.insert(0, str(API_DIR)) + + +def load_app_module(): + return importlib.import_module("app") + + +class SmokeAppTest(unittest.TestCase): + def test_import_app(self) -> None: + app_module = load_app_module() + self.assertTrue(hasattr(app_module, "app")) + + def test_fixed_routes(self) -> None: + app_module = load_app_module() + client = app_module.app.test_client() + + response = client.get("/getSession") + self.assertEqual(response.status_code, 200) + self.assertTrue(response.is_json) + self.assertIn("session", response.get_json()) + + response = client.get("/") + self.assertEqual(response.status_code, 200) + + def test_set_session_clear(self) -> None: + app_module = load_app_module() + client = app_module.app.test_client() + + response = client.post("/setSession", json={"case": None}) + self.assertEqual(response.status_code, 200) + self.assertTrue(response.is_json) + self.assertEqual(response.get_json(), {"osycase": None}) + + def test_set_session_missing_case(self) -> None: + app_module = load_app_module() + client = app_module.app.test_client() + + response = client.post("/setSession", json={}) + self.assertEqual(response.status_code, 404) + self.assertTrue(response.is_json) + + +if __name__ == "__main__": + unittest.main() From 6dca60e7cab8cf52387db6a9c92a7afa5f142542 Mon Sep 17 00:00:00 2001 From: lil-aditya Date: Fri, 3 Apr 2026 01:27:06 +0530 Subject: [PATCH 2/2] fix(guardrails): avoid pycache writes; use repo-relative skip filtering --- scripts/verify_clean_base.py | 81 ++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/scripts/verify_clean_base.py b/scripts/verify_clean_base.py index 0c3636db..13e34fe6 100644 --- a/scripts/verify_clean_base.py +++ b/scripts/verify_clean_base.py @@ -77,7 +77,9 @@ def iter_repo_files(root: Path, suffixes: set[str]) -> Iterable[Path]: path = root / raw_path.decode("utf-8", "surrogateescape") if path.suffix.lower() not in suffixes: continue - if any(part in SKIP_DIR_NAMES for part in path.parts): + # Apply skip filtering to paths *within the repo*, not absolute paths. + rel_parts = path.relative_to(root).parts + if any(part in SKIP_DIR_NAMES for part in rel_parts): continue yield path return @@ -87,7 +89,8 @@ def iter_repo_files(root: Path, suffixes: set[str]) -> Iterable[Path]: continue if path.suffix.lower() not in suffixes: continue - if any(part in SKIP_DIR_NAMES for part in path.parts): + rel_parts = path.relative_to(root).parts + if any(part in SKIP_DIR_NAMES for part in rel_parts): continue yield path @@ -175,40 +178,46 @@ def run_smoke_tests(root: Path) -> int: def main() -> int: - print(f"[verify] repo root: {REPO_ROOT}") - - git_state = check_unresolved_git_state(REPO_ROOT) - if git_state: - print("[FAIL] unresolved git state detected:") - for issue in git_state: - print(f" - {issue}") - return 1 - print("[OK] git state: clean") - - conflicts = check_conflict_markers(REPO_ROOT) - if conflicts: - print("[FAIL] conflict markers found:") - for conflict in conflicts: - print(f" - {conflict}") - return 1 - print("[OK] no conflict markers found") - - compile_failures = run_py_compile(REPO_ROOT) - if compile_failures: - print("[FAIL] python compile step failed:") - for failure in compile_failures: - print(f" - {failure}") - return 1 - print("[OK] python compile step passed") - - smoke_status = run_smoke_tests(REPO_ROOT) - if smoke_status != 0: - print(f"[FAIL] smoke tests failed (rc={smoke_status})") - return smoke_status - print("[OK] smoke tests passed") - - print("[SUCCESS] clean-base verification passed") - return 0 + # Do not allow any verification step to emit __pycache__ into the repo. + old_dont_write = sys.dont_write_bytecode + sys.dont_write_bytecode = True + try: + print(f"[verify] repo root: {REPO_ROOT}") + + git_state = check_unresolved_git_state(REPO_ROOT) + if git_state: + print("[FAIL] unresolved git state detected:") + for issue in git_state: + print(f" - {issue}") + return 1 + print("[OK] git state: clean") + + conflicts = check_conflict_markers(REPO_ROOT) + if conflicts: + print("[FAIL] conflict markers found:") + for conflict in conflicts: + print(f" - {conflict}") + return 1 + print("[OK] no conflict markers found") + + compile_failures = run_py_compile(REPO_ROOT) + if compile_failures: + print("[FAIL] python compile step failed:") + for failure in compile_failures: + print(f" - {failure}") + return 1 + print("[OK] python compile step passed") + + smoke_status = run_smoke_tests(REPO_ROOT) + if smoke_status != 0: + print(f"[FAIL] smoke tests failed (rc={smoke_status})") + return smoke_status + print("[OK] smoke tests passed") + + print("[SUCCESS] clean-base verification passed") + return 0 + finally: + sys.dont_write_bytecode = old_dont_write if __name__ == "__main__":