From b82d2eb5aede766fe8287d5619cd204cfc55e9ad Mon Sep 17 00:00:00 2001 From: ethanmillerinvestments-code <224840572+ethanmillerinvestments-code@users.noreply.github.com> Date: Tue, 12 May 2026 14:55:14 -0400 Subject: [PATCH] fix: speed up existing devcontainer lookup --- helpers/devcontainer_lookup.py | 51 +++++++++++++ main.py | 35 +++++---- tests/test_devcontainer_lookup.py | 118 ++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+), 16 deletions(-) create mode 100644 helpers/devcontainer_lookup.py create mode 100644 tests/test_devcontainer_lookup.py diff --git a/helpers/devcontainer_lookup.py b/helpers/devcontainer_lookup.py new file mode 100644 index 0000000..978f944 --- /dev/null +++ b/helpers/devcontainer_lookup.py @@ -0,0 +1,51 @@ +from dataclasses import dataclass +from typing import Any, Callable, Mapping, Optional, Tuple + + +@dataclass(frozen=True) +class DevcontainerLookupResult: + devcontainer_json: str + generated: bool + source: str + url: Optional[str] + repo_context: Optional[str] = None + devcontainer_url: Optional[str] = None + should_save: bool = False + + +def resolve_devcontainer_lookup( + repo_url: str, + regenerate: bool, + instructor_client: Any, + check_url_exists: Callable[[str], Tuple[bool, Optional[Mapping[str, Any]]]], + fetch_repo_context: Callable[[str], Tuple[str, Optional[str], Optional[str]]], + generate_devcontainer_json: Callable[..., Tuple[str, Optional[str]]], +) -> DevcontainerLookupResult: + exists, existing_record = check_url_exists(repo_url) + + if exists and existing_record and not regenerate: + return DevcontainerLookupResult( + devcontainer_json=existing_record["devcontainer_json"], + generated=existing_record["generated"], + source="database", + url=existing_record["devcontainer_url"], + ) + + repo_context, _existing_devcontainer, devcontainer_url = fetch_repo_context(repo_url) + devcontainer_json, url = generate_devcontainer_json( + instructor_client, + repo_url, + repo_context, + devcontainer_url, + regenerate=regenerate, + ) + + return DevcontainerLookupResult( + devcontainer_json=devcontainer_json, + generated=True, + source="generated" if url is None else "repository", + url=url, + repo_context=repo_context, + devcontainer_url=devcontainer_url, + should_save=True, + ) diff --git a/main.py b/main.py index 2e6410d..ad17368 100644 --- a/main.py +++ b/main.py @@ -8,6 +8,7 @@ from helpers.openai_helpers import setup_azure_openai, setup_instructor from helpers.github_helpers import fetch_repo_context, check_url_exists +from helpers.devcontainer_lookup import resolve_devcontainer_lookup from helpers.devcontainer_helpers import generate_devcontainer_json, validate_devcontainer_json from helpers.token_helpers import count_tokens, truncate_to_token_limit from models import DevContainer @@ -109,26 +110,28 @@ async def post(repo_url: str, regenerate: bool = False): repo_url = repo_url.rstrip('/') try: - exists, existing_record = check_url_exists(repo_url) - logging.info(f"URL check result: exists={exists}, existing_record={existing_record}") - - repo_context, existing_devcontainer, devcontainer_url = fetch_repo_context(repo_url) - logging.info(f"Fetched repo context. Existing devcontainer: {'Yes' if existing_devcontainer else 'No'}") - logging.info(f"Devcontainer URL: {devcontainer_url}") + lookup_result = resolve_devcontainer_lookup( + repo_url=repo_url, + regenerate=regenerate, + instructor_client=globals().get("instructor_client"), + check_url_exists=check_url_exists, + fetch_repo_context=fetch_repo_context, + generate_devcontainer_json=generate_devcontainer_json, + ) - if exists and not regenerate: + if lookup_result.source == "database": logging.info(f"URL already exists in database. Returning existing devcontainer_json for: {repo_url}") - devcontainer_json = existing_record['devcontainer_json'] - generated = existing_record['generated'] - source = "database" - url = existing_record['devcontainer_url'] else: - devcontainer_json, url = generate_devcontainer_json(instructor_client, repo_url, repo_context, devcontainer_url, regenerate=regenerate) - generated = True - source = "generated" if url is None else "repository" + logging.info(f"Fetched repo context. Devcontainer URL: {lookup_result.devcontainer_url}") + devcontainer_json = lookup_result.devcontainer_json + generated = lookup_result.generated + source = lookup_result.source + url = lookup_result.url + repo_context = lookup_result.repo_context + devcontainer_url = lookup_result.devcontainer_url - if not exists or regenerate: + if lookup_result.should_save: logging.info("Saving to database...") try: if hasattr(openai_client.embeddings, "create"): @@ -207,4 +210,4 @@ async def get(fname:str, ext:str): if __name__ == "__main__": logging.info("Starting FastHTML app...") - serve() \ No newline at end of file + serve() diff --git a/tests/test_devcontainer_lookup.py b/tests/test_devcontainer_lookup.py new file mode 100644 index 0000000..0f2eef5 --- /dev/null +++ b/tests/test_devcontainer_lookup.py @@ -0,0 +1,118 @@ +import unittest + +from helpers.devcontainer_lookup import resolve_devcontainer_lookup + + +class ResolveDevcontainerLookupTests(unittest.TestCase): + def test_cached_database_record_skips_repo_fetch_and_generation(self): + calls = {"lookup": 0, "fetch": 0, "generate": 0} + record = { + "devcontainer_json": '{"name": "cached"}', + "generated": True, + "devcontainer_url": "https://example.com/devcontainer.json", + } + + def check_url_exists(url): + calls["lookup"] += 1 + return True, record + + def fetch_repo_context(url): + calls["fetch"] += 1 + raise AssertionError("fetch_repo_context should not run for cache hits") + + def generate_devcontainer_json(*args, **kwargs): + calls["generate"] += 1 + raise AssertionError("generate_devcontainer_json should not run for cache hits") + + result = resolve_devcontainer_lookup( + "https://github.com/example/project", + regenerate=False, + instructor_client=object(), + check_url_exists=check_url_exists, + fetch_repo_context=fetch_repo_context, + generate_devcontainer_json=generate_devcontainer_json, + ) + + self.assertEqual(result.devcontainer_json, record["devcontainer_json"]) + self.assertTrue(result.generated) + self.assertEqual(result.source, "database") + self.assertEqual(result.url, record["devcontainer_url"]) + self.assertFalse(result.should_save) + self.assertEqual(calls, {"lookup": 1, "fetch": 0, "generate": 0}) + + def test_missing_record_fetches_and_generates_devcontainer(self): + calls = {"lookup": 0, "fetch": 0, "generate": 0} + + def check_url_exists(url): + calls["lookup"] += 1 + return False, None + + def fetch_repo_context(url): + calls["fetch"] += 1 + return "repo context", None, None + + def generate_devcontainer_json( + client, url, repo_context, devcontainer_url, regenerate=False + ): + calls["generate"] += 1 + return '{"name": "generated"}', None + + result = resolve_devcontainer_lookup( + "https://github.com/example/project", + regenerate=False, + instructor_client=object(), + check_url_exists=check_url_exists, + fetch_repo_context=fetch_repo_context, + generate_devcontainer_json=generate_devcontainer_json, + ) + + self.assertEqual(result.devcontainer_json, '{"name": "generated"}') + self.assertEqual(result.source, "generated") + self.assertTrue(result.generated) + self.assertTrue(result.should_save) + self.assertEqual(result.repo_context, "repo context") + self.assertIsNone(result.devcontainer_url) + self.assertEqual(calls, {"lookup": 1, "fetch": 1, "generate": 1}) + + def test_regenerate_ignores_cached_record_and_refreshes_context(self): + calls = {"lookup": 0, "fetch": 0, "generate": 0} + record = { + "devcontainer_json": '{"name": "cached"}', + "generated": True, + "devcontainer_url": "https://example.com/devcontainer.json", + } + + def check_url_exists(url): + calls["lookup"] += 1 + return True, record + + def fetch_repo_context(url): + calls["fetch"] += 1 + return "repo context", None, "https://example.com/source-devcontainer.json" + + def generate_devcontainer_json( + client, url, repo_context, devcontainer_url, regenerate=False + ): + calls["generate"] += 1 + self.assertTrue(regenerate) + return '{"name": "regenerated"}', None + + result = resolve_devcontainer_lookup( + "https://github.com/example/project", + regenerate=True, + instructor_client=object(), + check_url_exists=check_url_exists, + fetch_repo_context=fetch_repo_context, + generate_devcontainer_json=generate_devcontainer_json, + ) + + self.assertEqual(result.devcontainer_json, '{"name": "regenerated"}') + self.assertEqual(result.source, "generated") + self.assertTrue(result.generated) + self.assertTrue(result.should_save) + self.assertEqual(result.devcontainer_url, "https://example.com/source-devcontainer.json") + self.assertEqual(calls, {"lookup": 1, "fetch": 1, "generate": 1}) + + +if __name__ == "__main__": + unittest.main()