Skip to content

Commit c2a2888

Browse files
committed
Add runtime observability logging for wave search and LLVM-MCA
1 parent fcef7a7 commit c2a2888

8 files changed

Lines changed: 560 additions & 14 deletions

File tree

vxsort/smallsort/codegen/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,37 @@ uv run python src/demo_super_vectorizer.py
4949
uv run python src/bitonic_compiler.py --depth-limit=3
5050
```
5151

52+
### Runtime Logging (Search/Discovery/LLVM-MCA Observability)
53+
54+
Use file-backed runtime logging when diagnosing "stuck" synthesis/scoring runs.
55+
56+
```bash
57+
uv run python src/bitonic_compiler.py \
58+
--vector-machine AVX2 --datatype i64 \
59+
--depth-limit 3 --gadget-depth 1 --top-k 5 \
60+
--target-cpu ZEN4 \
61+
--log-file /tmp/vxsort-wave.log \
62+
--log-level DEBUG \
63+
--log-format json
64+
```
65+
66+
Flags:
67+
- `--log-file PATH`: enable runtime logging and write to file
68+
- `--log-level {DEBUG,INFO,WARNING,ERROR}`: event verbosity
69+
- `--log-format {text,json}`: human-readable or JSONL
70+
- `--log-max-mb N`: optional rotating file size (MB, 0 disables)
71+
- `--log-backups N`: rotated file count to keep
72+
73+
Suggested triage commands:
74+
75+
```bash
76+
# Follow everything live
77+
tail -f /tmp/vxsort-wave.log
78+
79+
# Focus on heartbeat + discovery + llvm-mca scoring completion
80+
rg 'heartbeat|discovery_chunk|mca_scoring_completed|mca_run_completed' /tmp/vxsort-wave.log
81+
```
82+
5283
### uops Database Source Selection (JSON only)
5384

5485
Runtime loading uses compact JSON only:

vxsort/smallsort/codegen/src/bitonic_compiler.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
load_checkpoint,
3030
validate_checkpoint_config,
3131
)
32+
from .runtime_logging import RuntimeLoggingConfig, configure_runtime_logging
3233

3334
except ImportError:
3435
from cost_model import CostModel, get_supported_cpus
@@ -49,6 +50,10 @@
4950
load_checkpoint,
5051
validate_checkpoint_config,
5152
)
53+
from runtime_logging import ( # type: ignore
54+
RuntimeLoggingConfig,
55+
configure_runtime_logging,
56+
)
5257

5358

5459
def _run_verification(
@@ -850,6 +855,41 @@ def main():
850855
choices=["auto", "textual", "none"],
851856
help="Wave runtime UI mode. 'auto' uses Textual only on an interactive TTY (default: auto).",
852857
)
858+
parser.add_argument(
859+
"--log-file",
860+
type=str,
861+
default=None,
862+
metavar="PATH",
863+
help="Write high-volume runtime events to this file.",
864+
)
865+
parser.add_argument(
866+
"--log-level",
867+
type=str,
868+
default="INFO",
869+
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
870+
help="Runtime log level (default: INFO).",
871+
)
872+
parser.add_argument(
873+
"--log-format",
874+
type=str,
875+
default="text",
876+
choices=["text", "json"],
877+
help="Runtime log encoding (default: text).",
878+
)
879+
parser.add_argument(
880+
"--log-max-mb",
881+
type=int,
882+
default=0,
883+
metavar="N",
884+
help="Rotate runtime log after N MB (0 disables rotation).",
885+
)
886+
parser.add_argument(
887+
"--log-backups",
888+
type=int,
889+
default=3,
890+
metavar="N",
891+
help="Number of rotated runtime log files to keep (default: 3).",
892+
)
853893

854894
args = parser.parse_args()
855895

@@ -861,6 +901,17 @@ def main():
861901
args, "llvm_mca_path_deprecated", None
862902
)
863903

904+
# Configure file-backed runtime logging (optional).
905+
log_cfg = RuntimeLoggingConfig(
906+
enabled=args.log_file is not None,
907+
level=args.log_level,
908+
file_path=args.log_file,
909+
format=args.log_format,
910+
max_bytes=max(0, args.log_max_mb) * 1024 * 1024,
911+
backup_count=max(0, args.log_backups),
912+
)
913+
configure_runtime_logging(log_cfg)
914+
864915
# --list-cpus mode: print supported architectures and exit
865916
if args.list_cpus:
866917
from util.uops_parser import (
@@ -968,6 +1019,11 @@ def main():
9681019
max_waves=args.max_waves,
9691020
top_k=args.top_k or 10,
9701021
runtime_ui=args.runtime_ui,
1022+
log_file=args.log_file,
1023+
log_level=args.log_level,
1024+
log_format=args.log_format,
1025+
log_max_mb=args.log_max_mb,
1026+
log_backups=args.log_backups,
9711027
)
9721028

9731029
engine = WaveEngine(wave_config)
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""Runtime logging utilities for wave synthesis/scoring observability."""
2+
3+
from __future__ import annotations
4+
5+
import atexit
6+
import json
7+
import logging
8+
from dataclasses import dataclass
9+
from datetime import UTC, datetime
10+
from logging.handlers import QueueHandler, QueueListener, RotatingFileHandler
11+
from queue import SimpleQueue
12+
from collections.abc import Callable
13+
from typing import Any
14+
15+
_VALID_FORMATS = {"text", "json"}
16+
17+
18+
@dataclass(slots=True)
19+
class RuntimeLoggingConfig:
20+
"""Configuration for runtime logging sinks and formatting."""
21+
22+
enabled: bool = False
23+
level: str = "INFO"
24+
file_path: str | None = None
25+
format: str = "text"
26+
max_bytes: int = 0
27+
backup_count: int = 3
28+
console: bool = False
29+
logger_name: str = "vxsort.runtime"
30+
31+
32+
def parse_log_level(level: str | int) -> int:
33+
"""Return numeric logging level from string/int input."""
34+
if isinstance(level, int):
35+
return level
36+
37+
normalized = level.strip().upper()
38+
mapping = {
39+
"CRITICAL": logging.CRITICAL,
40+
"ERROR": logging.ERROR,
41+
"WARNING": logging.WARNING,
42+
"WARN": logging.WARNING,
43+
"INFO": logging.INFO,
44+
"DEBUG": logging.DEBUG,
45+
}
46+
if normalized not in mapping:
47+
msg = f"Unknown log level: {level!r}"
48+
raise ValueError(msg)
49+
return mapping[normalized]
50+
51+
52+
class EventTextFormatter(logging.Formatter):
53+
"""Text formatter that prints event + key/value fields."""
54+
55+
def format(self, record: logging.LogRecord) -> str: # noqa: D401
56+
ts = datetime.fromtimestamp(record.created, tz=UTC).isoformat()
57+
event = getattr(record, "event_name", record.getMessage())
58+
fields = getattr(record, "event_fields", {})
59+
if fields:
60+
rendered_fields = " ".join(
61+
f"{key}={value!r}" for key, value in fields.items()
62+
)
63+
return f"{ts} [{record.levelname}] {event} | {rendered_fields}"
64+
return f"{ts} [{record.levelname}] {event}"
65+
66+
67+
class EventJsonFormatter(logging.Formatter):
68+
"""JSONL formatter for high-volume machine-readable logs."""
69+
70+
def format(self, record: logging.LogRecord) -> str: # noqa: D401
71+
payload: dict[str, Any] = {
72+
"ts": datetime.fromtimestamp(record.created, tz=UTC).isoformat(),
73+
"level": record.levelname,
74+
"logger": record.name,
75+
"event": getattr(record, "event_name", record.getMessage()),
76+
}
77+
payload.update(getattr(record, "event_fields", {}))
78+
79+
if record.exc_info:
80+
payload["exception"] = self.formatException(record.exc_info)
81+
82+
return json.dumps(payload, default=str)
83+
84+
85+
def _make_formatter(fmt: str) -> logging.Formatter:
86+
if fmt not in _VALID_FORMATS:
87+
msg = f"Unsupported log format: {fmt!r}. Expected one of {_VALID_FORMATS}"
88+
raise ValueError(msg)
89+
if fmt == "json":
90+
return EventJsonFormatter()
91+
return EventTextFormatter()
92+
93+
94+
def configure_runtime_logging(
95+
config: RuntimeLoggingConfig,
96+
) -> tuple[logging.Logger, Callable[[], None]]:
97+
"""Configure queue-backed runtime logging and return (logger, shutdown)."""
98+
logger = logging.getLogger(config.logger_name)
99+
100+
for handler in list(logger.handlers):
101+
logger.removeHandler(handler)
102+
103+
if not config.enabled:
104+
logger.setLevel(logging.NOTSET)
105+
logger.propagate = True
106+
107+
def _noop_shutdown() -> None:
108+
return
109+
110+
return logger, _noop_shutdown
111+
112+
level = parse_log_level(config.level)
113+
formatter = _make_formatter(config.format)
114+
115+
target_handlers: list[logging.Handler] = []
116+
117+
if config.file_path:
118+
if config.max_bytes and config.max_bytes > 0:
119+
file_handler: logging.Handler = RotatingFileHandler(
120+
config.file_path,
121+
maxBytes=config.max_bytes,
122+
backupCount=config.backup_count,
123+
encoding="utf-8",
124+
)
125+
else:
126+
file_handler = logging.FileHandler(config.file_path, encoding="utf-8")
127+
file_handler.setLevel(level)
128+
file_handler.setFormatter(formatter)
129+
target_handlers.append(file_handler)
130+
131+
if config.console:
132+
console_handler = logging.StreamHandler()
133+
console_handler.setLevel(level)
134+
console_handler.setFormatter(formatter)
135+
target_handlers.append(console_handler)
136+
137+
if not target_handlers:
138+
logger.setLevel(logging.NOTSET)
139+
logger.propagate = True
140+
141+
def _noop_shutdown() -> None:
142+
return
143+
144+
return logger, _noop_shutdown
145+
146+
queue_obj: SimpleQueue[Any] = SimpleQueue()
147+
queue_handler = QueueHandler(queue_obj)
148+
queue_handler.setLevel(level)
149+
logger.addHandler(queue_handler)
150+
logger.setLevel(level)
151+
logger.propagate = False
152+
153+
listener = QueueListener(queue_obj, *target_handlers, respect_handler_level=True)
154+
listener.start()
155+
156+
def _shutdown() -> None:
157+
try:
158+
listener.stop()
159+
except Exception:
160+
pass
161+
for handler in target_handlers:
162+
try:
163+
handler.flush()
164+
except Exception:
165+
pass
166+
try:
167+
handler.close()
168+
except Exception:
169+
pass
170+
for handler in list(logger.handlers):
171+
logger.removeHandler(handler)
172+
logger.setLevel(logging.NOTSET)
173+
logger.propagate = True
174+
175+
atexit.register(_shutdown)
176+
return logger, _shutdown
177+
178+
179+
def log_event(
180+
logger: logging.Logger,
181+
level: int | str,
182+
event: str,
183+
**fields: Any,
184+
) -> None:
185+
"""Emit one structured runtime event."""
186+
level_no = parse_log_level(level) if isinstance(level, str) else level
187+
if not logger.isEnabledFor(level_no):
188+
return
189+
190+
logger.log(
191+
level_no,
192+
event,
193+
extra={"event_name": event, "event_fields": fields},
194+
)

0 commit comments

Comments
 (0)