diff --git a/.gitignore b/.gitignore index 4ea98752..ac5c4452 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,12 @@ user/libbboeos/test_program/hello # compiled userland (user/libbboeos, ports/doom). user/libbboeos/include/syscalls.h +# Generated by tools/generate_libbboeos_stubs.py from +# kernel/include/constants.asm. Tiny `jmp [FUNCTION__PTR]` +# thunks linked into clang programs before libbboeos.a so each +# libbboeos export resolves to a 6-byte stub instead of a full body. +user/libbboeos/libbboeos_stubs.S + # tools/record_demo.py transient output _demo_capture/ diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 1b9688b7..96ff7ab6 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,6 +11,18 @@ time. ## [Unreleased](https://github.com/bboe/BBoeOS/compare/0.11.0...main) +- **libbboeos: Phase 4 — link clang programs against `libbboeos_stubs.o` thunks + instead of pulling the full bodies out of `libbboeos.a`.** Adds + `tools/generate_libbboeos_stubs.py` (reads `FUNCTION__PTR` entries from + `kernel/include/constants.asm`, emits a one-`jmp [absolute]`-per-export `.S` + file) and a Makefile rule that compiles it to + `user/libbboeos/libbboeos_stubs.o`. `ports/doom/build.py` and + `tests/test_libbboeos_qemu.py` now link `libbboeos_stubs.o` BEFORE + `libbboeos.a`, so ld resolves every libbboeos export to the 6-byte stub and + the corresponding archive member (`string.o`) is never pulled in. Saves ~1.7 + KB per clang-built binary (hello drops 12,682 → 10,922; doom drops 659,942 → + 658,182). The win scales as more libbboeos C sources move behind the pointer + table. - **libbboeos: export the remaining `` surface.** Add `memchr`, `memcmp`, `memcpy`, `memmove`, `memset`, `strcasecmp`, `strcat`, `strchr`, `strcpy`, `strdup`, `strerror`, `strlen`, `strncasecmp`, `strncat`, `strncmp`, diff --git a/ports/doom/build.py b/ports/doom/build.py index 66b2bebf..04ca45f8 100755 --- a/ports/doom/build.py +++ b/ports/doom/build.py @@ -234,6 +234,11 @@ def _link(*, objects: list[Path]) -> None: str(ELF_OUTPUT), str(LIBBBOEOS / "_start.o"), *[str(obj) for obj in objects], + # libbboeos_stubs.o MUST come before libbboeos.a so each + # FUNCTION_*_PTR export is defined by a 6-byte `jmp [absolute]` + # thunk into the shared blob; otherwise ld would pull the full + # string.c (etc.) bodies out of the archive into every program. + str(LIBBBOEOS / "libbboeos_stubs.o"), str(LIBBBOEOS / "libbboeos.a"), ]) subprocess.check_call([ diff --git a/tests/test_libbboeos_qemu.py b/tests/test_libbboeos_qemu.py index e081d80a..9cca0475 100755 --- a/tests/test_libbboeos_qemu.py +++ b/tests/test_libbboeos_qemu.py @@ -30,12 +30,12 @@ # each pattern in the full output independently, so this list is sorted # alphabetically rather than in the program's emit order. EXPECTED = [ - r"\[bboeos libc\] -1 4000000000 cafe ok", - r"\[bboeos libc\] dirent: \d+ entries, rewind ok", - r"\[bboeos libc\] done", - r"\[bboeos libc\] hello", - r"\[bboeos libc\] longjmp returned 42", - r"\[bboeos libc\] malloc-works", + r"\[libbboeos\] -1 4000000000 cafe ok", + r"\[libbboeos\] dirent: \d+ entries, rewind ok", + r"\[libbboeos\] done", + r"\[libbboeos\] hello", + r"\[libbboeos\] longjmp returned 42", + r"\[libbboeos\] malloc-works", ] @@ -64,6 +64,7 @@ def _build_hello() -> None: str(HELLO_BIN), str(LIBBBOEOS / "_start.o"), str(obj), + str(LIBBBOEOS / "libbboeos_stubs.o"), str(LIBBBOEOS / "libbboeos.a"), ]) diff --git a/tools/generate_libbboeos_stubs.py b/tools/generate_libbboeos_stubs.py new file mode 100755 index 00000000..feb7502f --- /dev/null +++ b/tools/generate_libbboeos_stubs.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +"""Generate user/libbboeos/libbboeos_stubs.S from kernel/include/constants.asm. + +Emits a tiny `jmp [FUNCTION__PTR]` thunk per libbboeos C export. +Clang-built userland programs (ports/doom, tests/test_libbboeos_qemu.py) +link this object file BEFORE libbboeos.a so the archive's full bodies +never get pulled in — every call dispatches through the shared +libbboeos blob's pointer table instead of being statically duplicated +per program. + +Rule: emit a stub for every FUNCTION__PTR entry whose un-suffixed +FUNCTION_ counterpart is *absent*. The legacy 13-entry block at +the top of FUNCTION_POINTER_TABLE (FUNCTION_DIE_PTR, ...) has both +FUNCTION_DIE and FUNCTION_DIE_PTR — those resolve to vdso.asm's +shared_* helpers and aren't libbboeos exports, so they're skipped. + +Re-run is idempotent — the script writes the file only if the contents +differ, so make / build.py can call it unconditionally without forcing +recompiles. +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +ASSIGN = re.compile(r"^\s*%assign\s+(?P\w+)\s+(?P.+?)\s*(?:;.*)?$") + +REPO = Path(__file__).resolve().parent.parent +DESTINATION = REPO / "user" / "libbboeos" / "libbboeos_stubs.S" +SOURCE = REPO / "kernel" / "include" / "constants.asm" + + +def _collect_function_constants() -> dict[str, int]: + """Return {NAME: VALUE} for every `%assign FUNCTION_<...>` in constants.asm. + + Values resolve to absolute integers. Handles forward references by + looping until a pass adds nothing new. NASM hex literals (`0x...` or + trailing `h`) and decimal integers are accepted. + """ + raw: dict[str, str] = {} + for line in SOURCE.read_text().splitlines(): + match = ASSIGN.match(line) + if match is None: + continue + raw[match.group("name")] = match.group("value").strip() + resolved: dict[str, int] = {} + while True: + progress = False + for name, value in raw.items(): + if name in resolved: + continue + integer = _try_evaluate(value=value, environment=resolved) + if integer is None: + continue + resolved[name] = integer + progress = True + if not progress: + break + return {name: value for name, value in resolved.items() if name.startswith("FUNCTION_")} + + +def _render_stubs(*, exports: list[tuple[str, int]]) -> str: + """Render libbboeos_stubs.S for the given (export_name, pointer_address) pairs.""" + lines = [ + "/* user/libbboeos/libbboeos_stubs.S — auto-generated. DO NOT EDIT.", + " *", + " * Regenerate with `python3 tools/generate_libbboeos_stubs.py`.", + " * Each stub is a 6-byte `jmp [FUNCTION__PTR]` thunk into the", + " * shared libbboeos blob. Clang programs link this file BEFORE", + " * libbboeos.a so ld resolves each export to the stub and never", + " * pulls the full body out of the archive — that's the Phase 4", + " * binary-size win (per-program string.c bodies retire to the", + " * shared blob).", + " *", + " * Source of truth: FUNCTION__PTR offsets in", + " * kernel/include/constants.asm. Sorted alphabetically to match.", + " */", + "", + " .intel_syntax noprefix", + ' .section .text.libbboeos_stubs, "ax", @progbits', + "", + ] + for name, address in exports: + symbol = name.lower() + lines.extend([ + f" .globl {symbol}", + f" .type {symbol}, @function", + f"{symbol}:", + f" jmp [0x{address:08x}] /* FUNCTION_{name}_PTR */", + f" .size {symbol}, . - {symbol}", + "", + ]) + return "\n".join(lines) + + +def _try_evaluate(*, environment: dict[str, int], value: str) -> int | None: + """Try to evaluate a NASM `%assign` RHS using already-resolved names. + + Returns None if any token references an unresolved name. + """ + normalized = re.sub(r"\b([0-9A-Fa-f]+)h\b", r"0x\1", value) + tokens = re.findall(r"\w+|[+\-*/()]", normalized) + expression_parts: list[str] = [] + for token in tokens: + if re.fullmatch(r"\w+", token) and not re.fullmatch(r"(?:0x[0-9a-fA-F]+|[0-9]+)", token): + if token not in environment: + return None + expression_parts.append(str(environment[token])) + else: + expression_parts.append(token) + try: + return int(eval(" ".join(expression_parts), {"__builtins__": {}}, {})) + except (NameError, SyntaxError, TypeError, ValueError, ZeroDivisionError): + return None + + +def main() -> int: + """Regenerate libbboeos_stubs.S from constants.asm; idempotent.""" + constants = _collect_function_constants() + exports: list[tuple[str, int]] = [] + for full_name, address in constants.items(): + if not full_name.endswith("_PTR"): + continue + base = full_name[len("FUNCTION_") : -len("_PTR")] + legacy = f"FUNCTION_{base}" + if legacy in constants: + continue + exports.append((base, address)) + exports.sort() + + new = _render_stubs(exports=exports) + if DESTINATION.exists() and DESTINATION.read_text() == new: + return 0 + DESTINATION.write_text(new) + print(f"wrote {DESTINATION.relative_to(REPO)} ({len(exports)} stubs)", file=sys.stderr) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/user/libbboeos/Makefile b/user/libbboeos/Makefile index 4b85e9f4..2bffabc8 100644 --- a/user/libbboeos/Makefile +++ b/user/libbboeos/Makefile @@ -1,8 +1,19 @@ -# user/libbboeos/Makefile — builds libbboeos.a, the freestanding libbboeos shim -# for clang-built bboeos userland programs. +# user/libbboeos/Makefile — builds libbboeos.a (full freestanding shim, +# used to populate the shared libbboeos blob) and libbboeos_stubs.o +# (per-export `jmp [FUNCTION__PTR]` thunks; linked into clang +# programs BEFORE libbboeos.a so the full bodies stay in the blob +# instead of being duplicated into every binary). -.DEFAULT_GOAL := libbboeos.a -.PHONY: clean syscalls_h +.DEFAULT_GOAL := all +.PHONY: all clean stubs_s syscalls_h + +all: libbboeos.a libbboeos_stubs.o + +# libbboeos_stubs.S is auto-generated from kernel/include/constants.asm +# by tools/generate_libbboeos_stubs.py and gitignored. Idempotent — +# regenerate on every build so a renumbered FUNCTION_*_PTR propagates. +stubs_s: + @python3 ../../tools/generate_libbboeos_stubs.py # include/syscalls.h is generated from kernel/include/constants.asm by # tools/generate_syscalls_h.py and gitignored. Run it unconditionally @@ -33,11 +44,16 @@ OBJS = $(C_SRCS:.c=.o) $(S_SRCS:.S=.o) S_SRCS := _start.S setjmp.S clean: - rm -f $(OBJS) libbboeos.a include/syscalls.h + rm -f $(OBJS) libbboeos.a libbboeos_stubs.o libbboeos_stubs.S include/syscalls.h libbboeos.a: $(OBJS) $(AR) rcs $@ $^ +libbboeos_stubs.o: libbboeos_stubs.S + $(CC) $(ASFLAGS) -c $< -o $@ + +libbboeos_stubs.S: stubs_s + %.o: %.c | syscalls_h $(CC) $(CFLAGS) -c $< -o $@