Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ user/libbboeos/test_program/hello
# compiled userland (user/libbboeos, ports/doom).
user/libbboeos/include/syscalls.h

# Generated by tools/generate_libbboeos_stubs.py from
# kernel/include/constants.asm. Tiny `jmp [FUNCTION_<NAME>_PTR]`
# thunks linked into clang programs before libbboeos.a so each
# libbboeos export resolves to a 6-byte stub instead of a full body.
user/libbboeos/libbboeos_stubs.S

# tools/record_demo.py transient output
_demo_capture/

Expand Down
12 changes: 12 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@ time.

## [Unreleased](https://github.com/bboe/BBoeOS/compare/0.11.0...main)

- **libbboeos: Phase 4 — link clang programs against `libbboeos_stubs.o` thunks
instead of pulling the full bodies out of `libbboeos.a`.** Adds
`tools/generate_libbboeos_stubs.py` (reads `FUNCTION_<NAME>_PTR` entries from
`kernel/include/constants.asm`, emits a one-`jmp [absolute]`-per-export `.S`
file) and a Makefile rule that compiles it to
`user/libbboeos/libbboeos_stubs.o`. `ports/doom/build.py` and
`tests/test_libbboeos_qemu.py` now link `libbboeos_stubs.o` BEFORE
`libbboeos.a`, so ld resolves every libbboeos export to the 6-byte stub and
the corresponding archive member (`string.o`) is never pulled in. Saves ~1.7
KB per clang-built binary (hello drops 12,682 → 10,922; doom drops 659,942 →
658,182). The win scales as more libbboeos C sources move behind the pointer
table.
- **libbboeos: export the remaining `<string.h>` surface.** Add `memchr`,
`memcmp`, `memcpy`, `memmove`, `memset`, `strcasecmp`, `strcat`, `strchr`,
`strcpy`, `strdup`, `strerror`, `strlen`, `strncasecmp`, `strncat`, `strncmp`,
Expand Down
5 changes: 5 additions & 0 deletions ports/doom/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ def _link(*, objects: list[Path]) -> None:
str(ELF_OUTPUT),
str(LIBBBOEOS / "_start.o"),
*[str(obj) for obj in objects],
# libbboeos_stubs.o MUST come before libbboeos.a so each
# FUNCTION_*_PTR export is defined by a 6-byte `jmp [absolute]`
# thunk into the shared blob; otherwise ld would pull the full
# string.c (etc.) bodies out of the archive into every program.
str(LIBBBOEOS / "libbboeos_stubs.o"),
str(LIBBBOEOS / "libbboeos.a"),
])
subprocess.check_call([
Expand Down
13 changes: 7 additions & 6 deletions tests/test_libbboeos_qemu.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
# each pattern in the full output independently, so this list is sorted
# alphabetically rather than in the program's emit order.
EXPECTED = [
r"\[bboeos libc\] -1 4000000000 cafe ok",
r"\[bboeos libc\] dirent: \d+ entries, rewind ok",
r"\[bboeos libc\] done",
r"\[bboeos libc\] hello",
r"\[bboeos libc\] longjmp returned 42",
r"\[bboeos libc\] malloc-works",
r"\[libbboeos\] -1 4000000000 cafe ok",
r"\[libbboeos\] dirent: \d+ entries, rewind ok",
r"\[libbboeos\] done",
r"\[libbboeos\] hello",
r"\[libbboeos\] longjmp returned 42",
r"\[libbboeos\] malloc-works",
]


Expand Down Expand Up @@ -64,6 +64,7 @@ def _build_hello() -> None:
str(HELLO_BIN),
str(LIBBBOEOS / "_start.o"),
str(obj),
str(LIBBBOEOS / "libbboeos_stubs.o"),
str(LIBBBOEOS / "libbboeos.a"),
])

Expand Down
142 changes: 142 additions & 0 deletions tools/generate_libbboeos_stubs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env python3
"""Generate user/libbboeos/libbboeos_stubs.S from kernel/include/constants.asm.

Emits a tiny `jmp [FUNCTION_<NAME>_PTR]` thunk per libbboeos C export.
Clang-built userland programs (ports/doom, tests/test_libbboeos_qemu.py)
link this object file BEFORE libbboeos.a so the archive's full bodies
never get pulled in — every call dispatches through the shared
libbboeos blob's pointer table instead of being statically duplicated
per program.

Rule: emit a stub for every FUNCTION_<NAME>_PTR entry whose un-suffixed
FUNCTION_<NAME> counterpart is *absent*. The legacy 13-entry block at
the top of FUNCTION_POINTER_TABLE (FUNCTION_DIE_PTR, ...) has both
FUNCTION_DIE and FUNCTION_DIE_PTR — those resolve to vdso.asm's
shared_* helpers and aren't libbboeos exports, so they're skipped.

Re-run is idempotent — the script writes the file only if the contents
differ, so make / build.py can call it unconditionally without forcing
recompiles.
"""

from __future__ import annotations

import re
import sys
from pathlib import Path

ASSIGN = re.compile(r"^\s*%assign\s+(?P<name>\w+)\s+(?P<value>.+?)\s*(?:;.*)?$")

REPO = Path(__file__).resolve().parent.parent
DESTINATION = REPO / "user" / "libbboeos" / "libbboeos_stubs.S"
SOURCE = REPO / "kernel" / "include" / "constants.asm"


def _collect_function_constants() -> dict[str, int]:
"""Return {NAME: VALUE} for every `%assign FUNCTION_<...>` in constants.asm.

Values resolve to absolute integers. Handles forward references by
looping until a pass adds nothing new. NASM hex literals (`0x...` or
trailing `h`) and decimal integers are accepted.
"""
raw: dict[str, str] = {}
for line in SOURCE.read_text().splitlines():
match = ASSIGN.match(line)
if match is None:
continue
raw[match.group("name")] = match.group("value").strip()
resolved: dict[str, int] = {}
while True:
progress = False
for name, value in raw.items():
if name in resolved:
continue
integer = _try_evaluate(value=value, environment=resolved)
if integer is None:
continue
resolved[name] = integer
progress = True
if not progress:
break
return {name: value for name, value in resolved.items() if name.startswith("FUNCTION_")}


def _render_stubs(*, exports: list[tuple[str, int]]) -> str:
"""Render libbboeos_stubs.S for the given (export_name, pointer_address) pairs."""
lines = [
"/* user/libbboeos/libbboeos_stubs.S — auto-generated. DO NOT EDIT.",
" *",
" * Regenerate with `python3 tools/generate_libbboeos_stubs.py`.",
" * Each stub is a 6-byte `jmp [FUNCTION_<NAME>_PTR]` thunk into the",
" * shared libbboeos blob. Clang programs link this file BEFORE",
" * libbboeos.a so ld resolves each export to the stub and never",
" * pulls the full body out of the archive — that's the Phase 4",
" * binary-size win (per-program string.c bodies retire to the",
" * shared blob).",
" *",
" * Source of truth: FUNCTION_<NAME>_PTR offsets in",
" * kernel/include/constants.asm. Sorted alphabetically to match.",
" */",
"",
" .intel_syntax noprefix",
' .section .text.libbboeos_stubs, "ax", @progbits',
"",
]
for name, address in exports:
symbol = name.lower()
lines.extend([
f" .globl {symbol}",
f" .type {symbol}, @function",
f"{symbol}:",
f" jmp [0x{address:08x}] /* FUNCTION_{name}_PTR */",
f" .size {symbol}, . - {symbol}",
"",
])
return "\n".join(lines)


def _try_evaluate(*, environment: dict[str, int], value: str) -> int | None:
"""Try to evaluate a NASM `%assign` RHS using already-resolved names.

Returns None if any token references an unresolved name.
"""
normalized = re.sub(r"\b([0-9A-Fa-f]+)h\b", r"0x\1", value)
tokens = re.findall(r"\w+|[+\-*/()]", normalized)
expression_parts: list[str] = []
for token in tokens:
if re.fullmatch(r"\w+", token) and not re.fullmatch(r"(?:0x[0-9a-fA-F]+|[0-9]+)", token):
if token not in environment:
return None
expression_parts.append(str(environment[token]))
else:
expression_parts.append(token)
try:
return int(eval(" ".join(expression_parts), {"__builtins__": {}}, {}))
except (NameError, SyntaxError, TypeError, ValueError, ZeroDivisionError):
return None


def main() -> int:
"""Regenerate libbboeos_stubs.S from constants.asm; idempotent."""
constants = _collect_function_constants()
exports: list[tuple[str, int]] = []
for full_name, address in constants.items():
if not full_name.endswith("_PTR"):
continue
base = full_name[len("FUNCTION_") : -len("_PTR")]
legacy = f"FUNCTION_{base}"
if legacy in constants:
continue
exports.append((base, address))
exports.sort()

new = _render_stubs(exports=exports)
if DESTINATION.exists() and DESTINATION.read_text() == new:
return 0
DESTINATION.write_text(new)
print(f"wrote {DESTINATION.relative_to(REPO)} ({len(exports)} stubs)", file=sys.stderr)
return 0


if __name__ == "__main__":
raise SystemExit(main())
26 changes: 21 additions & 5 deletions user/libbboeos/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
# user/libbboeos/Makefile — builds libbboeos.a, the freestanding libbboeos shim
# for clang-built bboeos userland programs.
# user/libbboeos/Makefile — builds libbboeos.a (full freestanding shim,
# used to populate the shared libbboeos blob) and libbboeos_stubs.o
# (per-export `jmp [FUNCTION_<NAME>_PTR]` thunks; linked into clang
# programs BEFORE libbboeos.a so the full bodies stay in the blob
# instead of being duplicated into every binary).

.DEFAULT_GOAL := libbboeos.a
.PHONY: clean syscalls_h
.DEFAULT_GOAL := all
.PHONY: all clean stubs_s syscalls_h

all: libbboeos.a libbboeos_stubs.o

# libbboeos_stubs.S is auto-generated from kernel/include/constants.asm
# by tools/generate_libbboeos_stubs.py and gitignored. Idempotent —
# regenerate on every build so a renumbered FUNCTION_*_PTR propagates.
stubs_s:
@python3 ../../tools/generate_libbboeos_stubs.py

# include/syscalls.h is generated from kernel/include/constants.asm by
# tools/generate_syscalls_h.py and gitignored. Run it unconditionally
Expand Down Expand Up @@ -33,11 +44,16 @@ OBJS = $(C_SRCS:.c=.o) $(S_SRCS:.S=.o)
S_SRCS := _start.S setjmp.S

clean:
rm -f $(OBJS) libbboeos.a include/syscalls.h
rm -f $(OBJS) libbboeos.a libbboeos_stubs.o libbboeos_stubs.S include/syscalls.h

libbboeos.a: $(OBJS)
$(AR) rcs $@ $^

libbboeos_stubs.o: libbboeos_stubs.S
$(CC) $(ASFLAGS) -c $< -o $@

libbboeos_stubs.S: stubs_s

%.o: %.c | syscalls_h
$(CC) $(CFLAGS) -c $< -o $@

Expand Down