Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ The configuration file is a yaml file that looks like this:
# - dependencies: A list of dependencies for the service. Each dependency is a yaml block that holds the dependency configuration. There are two types of dependencies:
# - local: A dependency that is defined in the config file. These dependencies do not have a remote field and must correspond to either a service defined in the 'services' section or a program defined in the 'x-programs' section.
# - remote: A dependency that is defined in the devservices directory in a remote repository. These configs are automatically fetched from the remote repository and installed. Any dependency with a remote field will be treated as a remote dependency. Example: https://github.com/getsentry/snuba/blob/59a5258ccbb502827ebc1d3b1bf80c607a3301bf/devservices/config.yml#L8
# Each dependency also supports the following optional fields:
# - healthcheck_timeout: The number of seconds to wait for this dependency's container(s) to become healthy. Defaults to 180.
# - modes: A list of modes for the service. Each mode includes a list of dependencies that are used in that mode.
x-sentry-service-config:
version: 0.1
Expand Down
7 changes: 7 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# TODO

## Per-dependency healthcheck_timeout for remote sub-services

`healthcheck_timeout` set on a dependency only applies to that dependency's containers when it is a direct local compose service. For remote dependencies (e.g. `snuba`), the timeout is applied to all of snuba's sub-containers as a group using the calling service's fallback (`HEALTHCHECK_TIMEOUT`). There is currently no way to configure a custom timeout for an individual sub-container (e.g. `kafka` inside `snuba`) from the calling service's `config.yml`.

To fix this properly, the timeout would need to be threaded through the remote dependency resolution path so that a service like snuba can declare its own per-sub-service timeouts in its own `config.yml` and have those respected when snuba is brought up as a dependency of another service.
13 changes: 11 additions & 2 deletions devservices/commands/up.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from devservices.constants import DEVSERVICES_DEPENDENCIES_CACHE_DIR
from devservices.constants import DEVSERVICES_DEPENDENCIES_CACHE_DIR_KEY
from devservices.constants import DEVSERVICES_DIR_NAME
from devservices.constants import HEALTHCHECK_TIMEOUT
from devservices.constants import DependencyType
from devservices.exceptions import ConfigError
from devservices.exceptions import ConfigNotFoundError
Expand All @@ -33,6 +34,7 @@
from devservices.utils.dependencies import InstalledRemoteDependency
from devservices.utils.dependencies import construct_dependency_graph
from devservices.utils.dependencies import install_and_verify_dependencies
from devservices.utils.docker import ContainerHealthcheckConfig
from devservices.utils.docker import check_all_containers_healthy
from devservices.utils.docker_compose import DockerComposeCommand
from devservices.utils.docker_compose import get_container_names_for_project
Expand Down Expand Up @@ -367,7 +369,7 @@ def bring_up_docker_compose_services(
mode_dependencies=mode_dependencies,
)

containers_to_check = []
containers_to_check: list[ContainerHealthcheckConfig] = []
with concurrent.futures.ThreadPoolExecutor() as up_dependency_executor:
futures = [
up_dependency_executor.submit(
Expand All @@ -383,7 +385,14 @@ def bring_up_docker_compose_services(
container_names = get_container_names_for_project(
cmd.project_name, cmd.config_path, cmd.services
)
containers_to_check.extend(container_names)
for container in container_names:
dep = service.config.dependencies.get(container.short_name)
timeout = dep.healthcheck_timeout if dep else HEALTHCHECK_TIMEOUT
containers_to_check.append(
ContainerHealthcheckConfig(
container.name, container.short_name, timeout
)
)
except DockerComposeError as dce:
status.failure(
f"Failed to get containers to healthcheck for {cmd.project_name}: {dce.stderr}"
Expand Down
5 changes: 5 additions & 0 deletions devservices/configs/service_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from devservices.constants import CONFIG_FILE_NAME
from devservices.constants import DEVSERVICES_DIR_NAME
from devservices.constants import HEALTHCHECK_TIMEOUT
from devservices.constants import DependencyType
from devservices.exceptions import ConfigNotFoundError
from devservices.exceptions import ConfigParseError
Expand All @@ -32,6 +33,7 @@ class Dependency:
description: str
dependency_type: DependencyType
remote: RemoteConfig | None = None
healthcheck_timeout: int = HEALTHCHECK_TIMEOUT


@dataclass
Expand Down Expand Up @@ -131,6 +133,9 @@ def load_service_config_from_file(
else None
),
dependency_type=dependency_type,
healthcheck_timeout=value.get(
"healthcheck_timeout", HEALTHCHECK_TIMEOUT
),
)
except TypeError as type_error:
raise ConfigParseError(
Expand Down
4 changes: 2 additions & 2 deletions devservices/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class DependencyType(StrEnum):
DEVSERVICES_CACHE_DIR, "latest_version.txt"
)
DEVSERVICES_LATEST_VERSION_CACHE_TTL = timedelta(minutes=15)
# Healthcheck timeout set to 2 minutes to account for slow healthchecks
HEALTHCHECK_TIMEOUT = 120
# Healthcheck timeout set to 3 minutes to account for slow healthchecks
HEALTHCHECK_TIMEOUT = 180
HEALTHCHECK_INTERVAL = 5
SUPERVISOR_TIMEOUT = 10
24 changes: 17 additions & 7 deletions devservices/utils/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
from devservices.utils.console import Status


class ContainerNames(NamedTuple):
class ContainerHealthcheckConfig(NamedTuple):
name: str
short_name: str
healthcheck_timeout: int = HEALTHCHECK_TIMEOUT


def check_docker_daemon_running() -> None:
Expand All @@ -43,25 +44,34 @@ def check_docker_daemon_running() -> None:


def check_all_containers_healthy(
status: Status, containers: list[ContainerNames]
status: Status,
containers: list[ContainerHealthcheckConfig],
) -> None:
"""Ensures all containers are healthy."""
status.info("Waiting for all containers to be healthy")
with concurrent.futures.ThreadPoolExecutor() as healthcheck_executor:
futures = [
healthcheck_executor.submit(wait_for_healthy, container, status)
healthcheck_executor.submit(
wait_for_healthy, container, status, container.healthcheck_timeout
)
for container in containers
]
for future in concurrent.futures.as_completed(futures):
future.result()


def wait_for_healthy(container: ContainerNames, status: Status) -> None:
def wait_for_healthy(
container: ContainerHealthcheckConfig,
status: Status,
timeout: int = HEALTHCHECK_TIMEOUT,
) -> None:
"""
Polls a Docker container's health status until it becomes healthy or a timeout is reached.
"""
status.info(
f"Waiting for {container.short_name} to be healthy (timeout: {timeout}s)"
)
start = time.time()
while time.time() - start < HEALTHCHECK_TIMEOUT:
while time.time() - start < timeout:
# Run docker inspect to get the container's health status
try:
# For containers with no healthchecks, the output will be "unknown"
Expand Down Expand Up @@ -96,7 +106,7 @@ def wait_for_healthy(container: ContainerNames, status: Status) -> None:
# If not healthy, wait and try again
time.sleep(HEALTHCHECK_INTERVAL)

raise ContainerHealthcheckFailedError(container.short_name, HEALTHCHECK_TIMEOUT)
raise ContainerHealthcheckFailedError(container.short_name, timeout)


@dataclass
Expand Down
8 changes: 5 additions & 3 deletions devservices/utils/docker_compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from devservices.exceptions import DockerComposeInstallationError
from devservices.utils.console import Console
from devservices.utils.dependencies import InstalledRemoteDependency
from devservices.utils.docker import ContainerNames
from devservices.utils.docker import ContainerHealthcheckConfig
from devservices.utils.docker import check_docker_daemon_running
from devservices.utils.install_binary import install_binary
from devservices.utils.services import Service
Expand Down Expand Up @@ -99,7 +99,7 @@ def install_docker_compose() -> None:

def get_container_names_for_project(
project_name: str, config_path: str, services: list[str]
) -> list[ContainerNames]:
) -> list[ContainerHealthcheckConfig]:
try:
output = subprocess.check_output(
[
Expand All @@ -119,7 +119,9 @@ def get_container_names_for_project(
text=True,
).splitlines()
return [
ContainerNames(name=json_data["name"], short_name=json_data["short_name"])
ContainerHealthcheckConfig(
name=json_data["name"], short_name=json_data["short_name"]
)
for line in output
if (json_data := json.loads(line))
]
Expand Down
12 changes: 6 additions & 6 deletions tests/commands/test_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,10 +753,10 @@ def test_print_health_logs_all_healthy(
mock_get_container_health: mock.Mock,
capsys: pytest.CaptureFixture[str],
) -> None:
from devservices.utils.docker import ContainerNames
from devservices.utils.docker import ContainerHealthcheckConfig

mock_get_container_names.return_value = [
ContainerNames(name="proj-redis-1", short_name="redis")
ContainerHealthcheckConfig(name="proj-redis-1", short_name="redis")
]
mock_get_container_health.return_value = [
ContainerHealth(name="proj-redis-1", status="healthy", log=[])
Expand All @@ -775,10 +775,10 @@ def test_print_health_logs_unhealthy_with_log(
mock_get_container_health: mock.Mock,
capsys: pytest.CaptureFixture[str],
) -> None:
from devservices.utils.docker import ContainerNames
from devservices.utils.docker import ContainerHealthcheckConfig

mock_get_container_names.return_value = [
ContainerNames(name="proj-redis-1", short_name="redis")
ContainerHealthcheckConfig(name="proj-redis-1", short_name="redis")
]
mock_get_container_health.return_value = [
ContainerHealth(
Expand All @@ -803,10 +803,10 @@ def test_print_health_logs_no_healthcheck(
mock_get_container_health: mock.Mock,
capsys: pytest.CaptureFixture[str],
) -> None:
from devservices.utils.docker import ContainerNames
from devservices.utils.docker import ContainerHealthcheckConfig

mock_get_container_names.return_value = [
ContainerNames(name="proj-redis-1", short_name="redis")
ContainerHealthcheckConfig(name="proj-redis-1", short_name="redis")
]
mock_get_container_health.return_value = [
ContainerHealth(name="proj-redis-1", status="none", log=[])
Expand Down
Loading
Loading