diff --git a/cc/codegen/x86/builtins.py b/cc/codegen/x86/builtins.py index 1828c7e7..790a0aff 100644 --- a/cc/codegen/x86/builtins.py +++ b/cc/codegen/x86/builtins.py @@ -523,10 +523,14 @@ def builtin_kernel_outb(self, arguments: list[Node], /) -> None: """Generate code for kernel_outb(port, value). Writes one byte to an I/O port. Constant ``value`` compiles to ``mov dx, ; mov al, ; - out dx, al``. Non-constant ``value`` evaluates first into AX, - is pushed across the port-evaluation, then popped — the same - save-around-eval shape :meth:`builtin_far_write8` uses. - Kernel-only (see :meth:`builtin_kernel_inb` for the rationale). + out dx, al``. When ``value`` is a runtime expression but + ``port`` is an ``Int`` literal, the value evaluates into AX and + the port lowers to a single ``mov dx, `` that doesn't + touch AX — no save-around-eval needed. The general case + (both ``port`` and ``value`` runtime) keeps the push/pop guard + around the port evaluation, the same save-around-eval shape + :meth:`builtin_far_write8` uses. Kernel-only (see + :meth:`builtin_kernel_inb` for the rationale). """ if self.target_mode != "kernel": message = "kernel_outb() is kernel-only; not available in --target user" @@ -536,6 +540,9 @@ def builtin_kernel_outb(self, arguments: list[Node], /) -> None: if isinstance(value_arg, Int): self.emit_register_from_argument(argument=port_arg, register=self.target.dx_register) self.emit(f" mov al, {value_arg.value & 0xFF}") + elif isinstance(port_arg, Int): + self.emit_register_from_argument(argument=value_arg, register=self.target.acc) + self.emit(f" mov {self.target.dx_register}, {port_arg.value & 0xFFFF}") else: self.emit_register_from_argument(argument=value_arg, register=self.target.acc) self.emit(f" push {self.target.acc}") @@ -569,8 +576,10 @@ def builtin_kernel_outw(self, arguments: list[Node], /) -> None: """Generate code for kernel_outw(port, value). Writes one 16-bit word to an I/O port. Constant ``value`` compiles to ``mov dx, ; mov ax, ; - out dx, ax``. Non-constant ``value`` uses the same push/pop guard - as :meth:`builtin_kernel_outb`. Kernel-only. + out dx, ax``. When ``value`` is a runtime expression but + ``port`` is an ``Int`` literal, the push/pop guard is elided + (same rationale as :meth:`builtin_kernel_outb`). General + case keeps the save-around-eval guard. Kernel-only. """ if self.target_mode != "kernel": message = "kernel_outw() is kernel-only; not available in --target user" @@ -580,6 +589,9 @@ def builtin_kernel_outw(self, arguments: list[Node], /) -> None: if isinstance(value_arg, Int): self.emit_register_from_argument(argument=port_arg, register=self.target.dx_register) self.emit(f" mov {self.target.acc}, {value_arg.value & 0xFFFF}") + elif isinstance(port_arg, Int): + self.emit_register_from_argument(argument=value_arg, register=self.target.acc) + self.emit(f" mov {self.target.dx_register}, {port_arg.value & 0xFFFF}") else: self.emit_register_from_argument(argument=value_arg, register=self.target.acc) self.emit(f" push {self.target.acc}") diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 8b90e6ca..23537104 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,18 +11,28 @@ time. ## [Unreleased](https://github.com/bboe/BBoeOS/compare/0.11.0...main) +- **cc.py: skip save-around-eval push/pop when ``kernel_outb`` / ``kernel_outw`` + port is a literal.** The general non-const-value path was unconditionally + emitting ``push eax; eval port → DX; pop eax`` to guard the value across port + evaluation. When the port is an ``Int`` literal, the port lowering is a + single ``mov dx, `` that doesn't touch EAX — so the save is dead. Add an + ``isinstance(port_arg, Int)`` branch that elides the push/pop in that case. + Almost every kernel ``kernel_outb`` / ``kernel_outw`` site uses a hex-literal + port (``0x21``, ``0x3F6``, etc.), so this fires kernel-wide: **1,980-byte kasm + reduction** on top of the byte-imm fold from the previous entry. + - **Wire user/libbboeos clang exports into the shared blob.** `user/libbboeos/libbboeos.a` (clang-compiled) now links into `build/libbboeos` alongside the asm helpers via the existing ld pipeline; `-ffunction-sections` + `--gc-sections` drop everything the pointer table doesn't reference, so the - blob pays only for what it actually exports. First entry: `strcmp`, with a - new `FUNCTION_STRCMP_PTR` constant at `FUNCTION_POINTER_TABLE + 52`. - Cc.py-side wiring (extern-call fallback that emits `call - [FUNCTION__PTR]` for unknown names) lands in a follow-up — this PR is - the build-pipeline plumbing. `VDSO_SIGRETURN_OFFSET` moves from `0x460` → - `0xFE0` so the helper region can grow past 1 KB without colliding with the - sigreturn trampoline; sigreturn now lives near the end of page 0 (past the - pointer table at `0x800..0x83C`). + blob pays only for what it actually exports. First entry: `strcmp`, with a + new `FUNCTION_STRCMP_PTR` constant at `FUNCTION_POINTER_TABLE + 52`. + Cc.py-side wiring (extern-call fallback that emits `call + [FUNCTION__PTR]` for unknown names) lands in a follow-up — this PR is + the build-pipeline plumbing. `VDSO_SIGRETURN_OFFSET` moves from `0x460` → + `0xFE0` so the helper region can grow past 1 KB without colliding with the + sigreturn trampoline; sigreturn now lives near the end of page 0 (past the + pointer table at `0x800..0x83C`). - **cc.py: fold byte-immediate store + movzx reload through local.** When a one-shot struct literal local is read via ``*(uint8_t *)&local`` — the driver port-I/O idiom — the compiler was emitting ``mov byte [ebp-N], diff --git a/tests/unit/test_cc_codegen.py b/tests/unit/test_cc_codegen.py index e7bea1f4..76d24a46 100644 --- a/tests/unit/test_cc_codegen.py +++ b/tests/unit/test_cc_codegen.py @@ -1338,6 +1338,26 @@ def test_kernel_no_program_end() -> None: assert "_program_end:" not in asm, f"'_program_end:' found in kernel output\n{asm}" +def test_kernel_outb_constant_port_runtime_value_no_push_pop() -> None: + """``kernel_outb(, )`` skips the save-around-eval push/pop. + + The push/pop in the general path guards the value (held in AX) + across the port evaluation, since port eval may clobber AX. When + port is an ``Int`` literal, the port lowering is a single + ``mov dx, `` that doesn't touch AX — so the save is dead and + the codegen elides it. + """ + asm = _kernel(""" + uint8_t status() { return 5; } + void send_status() { + kernel_outb(0x20, status()); + } + """) + assert "out dx, al" in asm, f"Expected 'out dx, al' in:\n{asm}" + assert "push ax" not in asm and "push eax" not in asm, f"Const-port outb should not push the accumulator:\n{asm}" + assert "pop ax" not in asm and "pop eax" not in asm, f"Const-port outb should not pop the accumulator:\n{asm}" + + def test_kernel_outb_constant_value_short_form() -> None: """``kernel_outb(port, const)`` compiles to ``mov al, `` (no AX push/pop).""" asm = _kernel(""" @@ -1378,6 +1398,23 @@ def test_kernel_outsw_emits_rep_outsw() -> None: assert " rep outsw" in asm, f"expected rep outsw:\n{asm}" +def test_kernel_outw_constant_port_runtime_value_no_push_pop() -> None: + """``kernel_outw(, )`` skips the save-around-eval push/pop. + + Same elision as :func:`test_kernel_outb_constant_port_runtime_value_no_push_pop`, + just for the 16-bit out variant. + """ + asm = _kernel(""" + int status() { return 5; } + void send_status() { + kernel_outw(0x300, status()); + } + """) + assert "out dx, ax" in asm, f"Expected 'out dx, ax' in:\n{asm}" + assert "push ax" not in asm and "push eax" not in asm, f"Const-port outw should not push the accumulator:\n{asm}" + assert "pop ax" not in asm and "pop eax" not in asm, f"Const-port outw should not pop the accumulator:\n{asm}" + + def test_kernel_outw_constant_value_short_form() -> None: """``kernel_outw(port, const)`` compiles to a constant ``mov ax, ...`` then ``out dx, ax``.""" asm = _kernel("""