From 2db36c240c988689d12cb62eb831bd8771731f62 Mon Sep 17 00:00:00 2001 From: Joshua Li Date: Fri, 17 Apr 2026 16:28:30 -0700 Subject: [PATCH 1/4] default 180, make timeout configurable --- README.md | 1 + devservices/commands/up.py | 4 +++- devservices/configs/service_config.py | 5 +++++ devservices/constants.py | 4 ++-- devservices/utils/docker.py | 19 ++++++++++++++----- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4ef98c5..922165c 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ The configuration file is a yaml file that looks like this: # - local: A dependency that is defined in the config file. These dependencies do not have a remote field and must correspond to either a service defined in the 'services' section or a program defined in the 'x-programs' section. # - remote: A dependency that is defined in the devservices directory in a remote repository. These configs are automatically fetched from the remote repository and installed. Any dependency with a remote field will be treated as a remote dependency. Example: https://github.com/getsentry/snuba/blob/59a5258ccbb502827ebc1d3b1bf80c607a3301bf/devservices/config.yml#L8 # - modes: A list of modes for the service. Each mode includes a list of dependencies that are used in that mode. +# - healthcheck_timeout: Optional. The number of seconds to wait for all containers to become healthy before failing. Defaults to 180. x-sentry-service-config: version: 0.1 service_name: example-service diff --git a/devservices/commands/up.py b/devservices/commands/up.py index 90864e9..8143ea9 100644 --- a/devservices/commands/up.py +++ b/devservices/commands/up.py @@ -390,7 +390,9 @@ def bring_up_docker_compose_services( ) exit(1) try: - check_all_containers_healthy(status, containers_to_check) + check_all_containers_healthy( + status, containers_to_check, timeout=service.config.healthcheck_timeout + ) except ContainerHealthcheckFailedError as e: status.failure(str(e)) exit(1) diff --git a/devservices/configs/service_config.py b/devservices/configs/service_config.py index 5c69e45..8bdaf33 100644 --- a/devservices/configs/service_config.py +++ b/devservices/configs/service_config.py @@ -9,6 +9,7 @@ from devservices.constants import CONFIG_FILE_NAME from devservices.constants import DEVSERVICES_DIR_NAME +from devservices.constants import HEALTHCHECK_TIMEOUT from devservices.constants import DependencyType from devservices.exceptions import ConfigNotFoundError from devservices.exceptions import ConfigParseError @@ -40,6 +41,7 @@ class ServiceConfig: service_name: str dependencies: dict[str, Dependency] modes: dict[str, list[str]] + healthcheck_timeout: int = HEALTHCHECK_TIMEOUT def __post_init__(self) -> None: self._validate() @@ -142,6 +144,9 @@ def load_service_config_from_file( service_name=service_config_data.get("service_name"), dependencies=dependencies, modes=service_config_data.get("modes", {}), + healthcheck_timeout=service_config_data.get( + "healthcheck_timeout", HEALTHCHECK_TIMEOUT + ), ) return service_config diff --git a/devservices/constants.py b/devservices/constants.py index 5b893c4..b578329 100644 --- a/devservices/constants.py +++ b/devservices/constants.py @@ -65,7 +65,7 @@ class DependencyType(StrEnum): DEVSERVICES_CACHE_DIR, "latest_version.txt" ) DEVSERVICES_LATEST_VERSION_CACHE_TTL = timedelta(minutes=15) -# Healthcheck timeout set to 2 minutes to account for slow healthchecks -HEALTHCHECK_TIMEOUT = 120 +# Healthcheck timeout set to 3 minutes to account for slow healthchecks +HEALTHCHECK_TIMEOUT = 180 HEALTHCHECK_INTERVAL = 5 SUPERVISOR_TIMEOUT = 10 diff --git a/devservices/utils/docker.py b/devservices/utils/docker.py index 6992dfd..7eb08c4 100644 --- a/devservices/utils/docker.py +++ b/devservices/utils/docker.py @@ -43,25 +43,34 @@ def check_docker_daemon_running() -> None: def check_all_containers_healthy( - status: Status, containers: list[ContainerNames] + status: Status, + containers: list[ContainerNames], + timeout: int = HEALTHCHECK_TIMEOUT, ) -> None: """Ensures all containers are healthy.""" status.info("Waiting for all containers to be healthy") with concurrent.futures.ThreadPoolExecutor() as healthcheck_executor: futures = [ - healthcheck_executor.submit(wait_for_healthy, container, status) + healthcheck_executor.submit(wait_for_healthy, container, status, timeout) for container in containers ] for future in concurrent.futures.as_completed(futures): future.result() -def wait_for_healthy(container: ContainerNames, status: Status) -> None: +def wait_for_healthy( + container: ContainerNames, + status: Status, + timeout: int = HEALTHCHECK_TIMEOUT, +) -> None: """ Polls a Docker container's health status until it becomes healthy or a timeout is reached. """ + status.info( + f"Waiting for {container.short_name} to be healthy (timeout: {timeout}s)" + ) start = time.time() - while time.time() - start < HEALTHCHECK_TIMEOUT: + while time.time() - start < timeout: # Run docker inspect to get the container's health status try: # For containers with no healthchecks, the output will be "unknown" @@ -96,7 +105,7 @@ def wait_for_healthy(container: ContainerNames, status: Status) -> None: # If not healthy, wait and try again time.sleep(HEALTHCHECK_INTERVAL) - raise ContainerHealthcheckFailedError(container.short_name, HEALTHCHECK_TIMEOUT) + raise ContainerHealthcheckFailedError(container.short_name, timeout) @dataclass From a89aff214d1d31565a2ba6c1e7b1467b05a68d95 Mon Sep 17 00:00:00 2001 From: Joshua Li Date: Fri, 17 Apr 2026 16:30:18 -0700 Subject: [PATCH 2/4] fix tests --- tests/commands/test_up.py | 3 ++- tests/configs/test_service_config.py | 2 ++ tests/utils/test_docker.py | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/commands/test_up.py b/tests/commands/test_up.py index 8c5f870..7c16bae 100644 --- a/tests/commands/test_up.py +++ b/tests/commands/test_up.py @@ -865,7 +865,7 @@ def test_up_docker_compose_container_healthcheck_failed( assert "Starting clickhouse" in captured.out.strip() assert "Starting redis" in captured.out.strip() assert ( - "Container container1 did not become healthy within 120 seconds." + "Container container1 did not become healthy within 180 seconds." in captured.out.strip() ) @@ -1376,6 +1376,7 @@ def test_up_multiple_modes_overlapping_running_service( mock_check_all_containers_healthy.assert_called_once_with( mock.ANY, ["container1", "container2"], + timeout=mock.ANY, ) captured = capsys.readouterr() diff --git a/tests/configs/test_service_config.py b/tests/configs/test_service_config.py index e25fc89..aed742f 100644 --- a/tests/configs/test_service_config.py +++ b/tests/configs/test_service_config.py @@ -111,6 +111,7 @@ def test_load_service_config_from_file( for key, value in dependencies.items() }, "modes": modes, + "healthcheck_timeout": 180, } @@ -131,6 +132,7 @@ def test_load_service_config_from_file_no_dependencies(tmp_path: Path) -> None: "service_name": "example-service", "dependencies": {}, "modes": {"default": []}, + "healthcheck_timeout": 180, } diff --git a/tests/utils/test_docker.py b/tests/utils/test_docker.py index 938a288..4c799cb 100644 --- a/tests/utils/test_docker.py +++ b/tests/utils/test_docker.py @@ -533,10 +533,12 @@ def test_check_all_containers_healthy_success( mock.call( ContainerNames(name="devservices-container1", short_name="container1"), mock_status, + 180, ), mock.call( ContainerNames(name="devservices-container2", short_name="container2"), mock_status, + 180, ), ] ) @@ -671,10 +673,12 @@ def test_check_all_containers_healthy_failure( mock.call( ContainerNames(name="devservices-container1", short_name="container1"), mock_status, + 180, ), mock.call( ContainerNames(name="devservices-container2", short_name="container2"), mock_status, + 180, ), ] ) From eb664a559a21159c62f4aa8a1ff538a4f1b4d8f0 Mon Sep 17 00:00:00 2001 From: Joshua Li Date: Fri, 17 Apr 2026 16:41:08 -0700 Subject: [PATCH 3/4] feedback --- devservices/configs/service_config.py | 14 +++++++--- tests/configs/test_service_config.py | 37 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/devservices/configs/service_config.py b/devservices/configs/service_config.py index 8bdaf33..d739e4d 100644 --- a/devservices/configs/service_config.py +++ b/devservices/configs/service_config.py @@ -61,6 +61,14 @@ def _validate(self) -> None: if "default" not in self.modes: raise ConfigValidationError("Default mode is required in service config") + if ( + not isinstance(self.healthcheck_timeout, int) + or self.healthcheck_timeout <= 0 + ): + raise ConfigValidationError( + "healthcheck_timeout must be a positive integer" + ) + for mode, services in self.modes.items(): if not isinstance(services, list): raise ConfigValidationError(f"Services in mode '{mode}' must be a list") @@ -144,9 +152,9 @@ def load_service_config_from_file( service_name=service_config_data.get("service_name"), dependencies=dependencies, modes=service_config_data.get("modes", {}), - healthcheck_timeout=service_config_data.get( - "healthcheck_timeout", HEALTHCHECK_TIMEOUT - ), + healthcheck_timeout=service_config_data.get("healthcheck_timeout") + if service_config_data.get("healthcheck_timeout") is not None + else HEALTHCHECK_TIMEOUT, ) return service_config diff --git a/tests/configs/test_service_config.py b/tests/configs/test_service_config.py index aed742f..ae1381f 100644 --- a/tests/configs/test_service_config.py +++ b/tests/configs/test_service_config.py @@ -136,6 +136,43 @@ def test_load_service_config_from_file_no_dependencies(tmp_path: Path) -> None: } +def test_load_service_config_from_file_null_healthcheck_timeout( + tmp_path: Path, +) -> None: + config = { + "x-sentry-service-config": { + "version": 0.1, + "service_name": "example-service", + "modes": {"default": []}, + "healthcheck_timeout": None, + }, + "services": {}, + } + create_config_file(tmp_path, config) + + service_config = load_service_config_from_file(str(tmp_path)) + assert service_config.healthcheck_timeout == 180 + + +def test_load_service_config_from_file_invalid_healthcheck_timeout( + tmp_path: Path, +) -> None: + config = { + "x-sentry-service-config": { + "version": 0.1, + "service_name": "example-service", + "modes": {"default": []}, + "healthcheck_timeout": -1, + }, + "services": {}, + } + create_config_file(tmp_path, config) + + with pytest.raises(ConfigValidationError) as e: + load_service_config_from_file(str(tmp_path)) + assert str(e.value) == "healthcheck_timeout must be a positive integer" + + def test_load_service_config_from_file_missing_config(tmp_path: Path) -> None: with pytest.raises(ConfigNotFoundError) as e: load_service_config_from_file(str(tmp_path)) From 264d8c4a33ee7b07aafd351eb02c58d1ba8dfb26 Mon Sep 17 00:00:00 2001 From: Joshua Li Date: Fri, 17 Apr 2026 16:54:13 -0700 Subject: [PATCH 4/4] better validation --- devservices/configs/service_config.py | 2 +- tests/configs/test_service_config.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/devservices/configs/service_config.py b/devservices/configs/service_config.py index d739e4d..80847cc 100644 --- a/devservices/configs/service_config.py +++ b/devservices/configs/service_config.py @@ -61,7 +61,7 @@ def _validate(self) -> None: if "default" not in self.modes: raise ConfigValidationError("Default mode is required in service config") - if ( + if isinstance(self.healthcheck_timeout, bool) or ( not isinstance(self.healthcheck_timeout, int) or self.healthcheck_timeout <= 0 ): diff --git a/tests/configs/test_service_config.py b/tests/configs/test_service_config.py index ae1381f..7823627 100644 --- a/tests/configs/test_service_config.py +++ b/tests/configs/test_service_config.py @@ -154,15 +154,17 @@ def test_load_service_config_from_file_null_healthcheck_timeout( assert service_config.healthcheck_timeout == 180 +@pytest.mark.parametrize("invalid_timeout", [-1, 0, "120", True, False]) def test_load_service_config_from_file_invalid_healthcheck_timeout( tmp_path: Path, + invalid_timeout: object, ) -> None: config = { "x-sentry-service-config": { "version": 0.1, "service_name": "example-service", "modes": {"default": []}, - "healthcheck_timeout": -1, + "healthcheck_timeout": invalid_timeout, }, "services": {}, }