Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion archive/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ programs (`make_os.sh` passes `--bits 32`); every row in this table is now
| ls | 135 | 412 | 697 | +285 |
| mkdir | 123 | 142 | 163 | +21 |
| mv | 217 | 242 | 270 | +28 |
| ping | 1034 | 1230 | 1544 | +314 |
| ping | 1034 | 1230 | 1522 | +292 |
| uptime | 50 | 67 | 102 | +35 |

**arp (-99):** The three scratch arrays (`mac_buffer[6]`, `receive_buffer[128]`,
Expand Down
22 changes: 19 additions & 3 deletions cc/codegen/x86/emission.py
Original file line number Diff line number Diff line change
Expand Up @@ -1672,11 +1672,19 @@ def _lower_ir_instruction(self, instruction: ir.Instruction) -> None:
self.emit_store_local(expression=self._ir_value_to_ast(source), name=destination)
case ir.Call(destination=None, name=name, args=args):
call = Call(args=[self._ir_value_to_ast(a) for a in args], name=name)
self.generate_call(call, discard_return=True)
self._current_call_pinned_initialized = self._ir_call_pinned_initialized.get(id(instruction))
try:
self.generate_call(call, discard_return=True)
finally:
self._current_call_pinned_initialized = None
self.ax_clear()
case ir.Call(destination=destination, name=name, args=args):
call = Call(args=[self._ir_value_to_ast(a) for a in args], name=name)
self.emit_store_local(expression=call, name=destination)
self._current_call_pinned_initialized = self._ir_call_pinned_initialized.get(id(instruction))
try:
self.emit_store_local(expression=call, name=destination)
finally:
self._current_call_pinned_initialized = None
case ir.Index(destination=destination, base=base, index=index):
expression = Index(array=Var(name=base), index=self._ir_value_to_ast(index))
self.emit_store_local(expression=expression, name=destination)
Expand All @@ -1703,7 +1711,11 @@ def _lower_ir_instruction(self, instruction: ir.Instruction) -> None:
# ``if`` / ``while`` condition. ``generate_call`` sets
# up args (regparm / stack) the same way a direct call
# would.
self.generate_call(call_ast, discard_return=True)
self._current_call_pinned_initialized = self._ir_call_pinned_initialized.get(id(instruction))
try:
self.generate_call(call_ast, discard_return=True)
finally:
self._current_call_pinned_initialized = None
self.emit(f" {'jc' if when == 'set' else 'jnc'} {target}")
self.ax_clear()
case ir.Return(value=value):
Expand Down Expand Up @@ -1801,6 +1813,9 @@ def generate_function(self, function: Function | ir.Function, /) -> None:
self.current_carry_return = function.carry_return
self.current_function_is_main = name == "main"
self.current_function_is_naked = function.naked
self._current_function_parameter_names: tuple[str, ...] = tuple(parameter.name for parameter in parameters)
self._ir_call_pinned_initialized = {}
self._current_call_pinned_initialized = None
# Per-function user-label bookkeeping for the AST codegen path.
# The IR path validates inside ir.Builder; main() and other AST-
# path functions validate here after generate_body completes.
Expand Down Expand Up @@ -2138,6 +2153,7 @@ def _body_has_stack_arrays(stmts: list[Node]) -> bool:

if ir_body is not None:
# IR path: lower the flat instruction list directly.
self._ir_call_pinned_initialized = self._compute_pinned_initialized_per_call(ir_body)
self.lower_ir_body(ir_body)
else:
# Tail-call: if the last statement is a statement-level user-
Expand Down
205 changes: 177 additions & 28 deletions cc/codegen/x86/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from dataclasses import fields
from typing import ClassVar, NamedTuple

from cc import ir
from cc.ast_nodes import (
AddressOf,
ArrayDecl,
Expand Down Expand Up @@ -271,6 +272,16 @@ def __init__(
self.out_register_params: dict[str, dict[int, str]] = {}
self.param_in_register: dict[str, str] = {}
self.pinned_register: dict[str, str] = {}
# Liveness map for pinned-register saves: maps id(ir.Call /
# ir.CarryBranch) → frozenset of pinned-register names that are
# may-defined at that call site. Populated per function before
# IR lowering by _compute_pinned_initialized_per_call.
# _pinned_registers_to_save consults this to skip saves for
# pinned locals whose value isn't yet meaningful (e.g.,
# auto-pinned locals declared but not yet stored to). None
# means "no info available" — fall back to saving everything.
self._ir_call_pinned_initialized: dict[int, frozenset[str]] = {}
self._current_call_pinned_initialized: frozenset[str] | None = None
self.register_aliased_globals: dict[str, str] = {} # name → register (e.g. "si")
self.store_target_register: str | None = None
# known_local_bytes and _last_byte_store support the Phase C
Expand Down Expand Up @@ -430,33 +441,6 @@ def _arg_pinned_sources(self, arg: Node, /) -> set[str]:
return self._arg_pinned_sources(arg.left) | self._arg_pinned_sources(arg.right)
return set()

def _collect_pinned_reads(self, node: Node, /) -> set[str]:
"""Return every pinned register that *node*'s expression reads.

Like :meth:`_arg_pinned_sources` but walks the full AST shape —
``UnaryOperation``, ``AddressOf``, ``Index``, etc. — so it can
be used to schedule syscall-builtin argument loads where the
arg AST is not restricted to the simple-call shape. Returns
a set of register names (e.g. ``{"ebx", "edi"}``).
"""
reads: set[str] = set()
stack: list[Node] = [node]
while stack:
current = stack.pop()
if isinstance(current, Var):
if current.name in self.pinned_register:
reads.add(self.pinned_register[current.name])
elif current.name in self.param_in_register:
reads.add(self.param_in_register[current.name])
continue
for slot in getattr(type(current), "__slots__", ()):
child = getattr(current, slot, None)
if isinstance(child, Node):
stack.append(child)
elif isinstance(child, list):
stack.extend(item for item in child if isinstance(item, Node))
return reads

def _arithmetic_element_size(self, var_name: str, /) -> int:
"""Return the element stride for pointer/array arithmetic on *var_name*.

Expand Down Expand Up @@ -524,6 +508,133 @@ def _byte_index_direct(self, node: Index, /) -> str | None:
offset = node.index.value
return f"{const_base}+{offset}" if offset else const_base

def _collect_pinned_reads(self, node: Node, /) -> set[str]:
"""Return every pinned register that *node*'s expression reads.

Like :meth:`_arg_pinned_sources` but walks the full AST shape —
``UnaryOperation``, ``AddressOf``, ``Index``, etc. — so it can
be used to schedule syscall-builtin argument loads where the
arg AST is not restricted to the simple-call shape. Returns
a set of register names (e.g. ``{"ebx", "edi"}``).
"""
reads: set[str] = set()
stack: list[Node] = [node]
while stack:
current = stack.pop()
if isinstance(current, Var):
if current.name in self.pinned_register:
reads.add(self.pinned_register[current.name])
elif current.name in self.param_in_register:
reads.add(self.param_in_register[current.name])
continue
for slot in getattr(type(current), "__slots__", ()):
child = getattr(current, slot, None)
if isinstance(child, Node):
stack.append(child)
elif isinstance(child, list):
stack.extend(item for item in child if isinstance(item, Node))
return reads

def _compute_pinned_initialized_per_call(self, ir_body: list, /) -> dict[int, frozenset[str]]:
"""Pre-pass: for each ir.Call / ir.CarryBranch, the may-defined pinned register set.

Auto-pinned locals are not initialized until the first store to
them. Saving a pinned register around a call before that
store preserves garbage — :meth:`_pinned_registers_to_save`
consults the map this method produces and skips the save when
the local can't yet hold a meaningful value.

Initial defined set: registers held by parameters (loaded into
their pin in the prologue) and locals declared with
``__attribute__((pinned_register(R)))`` whose initializer fired
as part of the declaration. Auto-pinned locals start
undefined.

Loop bodies are pre-merged: any store inside a loop region
(Label..back-Jump) is added to the defined set BEFORE the
first instruction of the loop, so subsequent iterations see
the value as live. Without this, calls inside the loop body
that appear before the store in source order would skip a
save that the second iteration actually needs.

Returns dict keyed by id(instruction). Empty / missing key
means "no live pin" so callers should treat absence as
``frozenset()`` — distinct from ``None`` which means "no
analysis was performed" (AST path, naked function, etc.).
"""
pinned_locals: dict[str, str] = dict(self.pinned_register)
if not pinned_locals:
return {}
initial: set[str] = set(self._prologue_initialized_pinned_registers())

def store_target(instruction: object) -> str | None:
if isinstance(instruction, (ir.BinaryOperation, ir.Copy, ir.Index)):
return instruction.destination
if isinstance(instruction, ir.Block):
# Block-wrapped AST escape hatch. A VarDecl with
# initialiser is a store to its name; ditto an
# ``unsigned long`` Assign that the IR builder routes
# through Block. Pinned-to-register locals can't be
# ``unsigned long`` (they wouldn't fit a single register),
# so only the VarDecl case can hit a pinned target —
# but we still extract Assign / MemberAssign destinations
# defensively in case future IR shapes wrap them.
node = instruction.node
if isinstance(node, Assign):
return node.name
if isinstance(node, VarDecl) and node.init is not None:
return node.name
# MemberAssign / IndexAssign / inline asm write through
# pointers or are opaque — they don't store to a single
# named local register. Skip.
return None
if isinstance(instruction, ir.Call) and instruction.destination is not None:
return instruction.destination
if isinstance(instruction, ir.IndexAssign):
# IndexAssign writes through a base pointer, not to the
# named base itself — leaves the base's register
# contents unchanged. Not a store to the pin.
return None
return None

label_positions: dict[str, int] = {}
for index, instruction in enumerate(ir_body):
if isinstance(instruction, ir.Label):
label_positions[instruction.name] = index
loop_ranges: list[tuple[int, int]] = []
for index, instruction in enumerate(ir_body):
if isinstance(instruction, ir.Jump):
target = label_positions.get(instruction.target)
if target is not None and target < index:
loop_ranges.append((target, index))
loop_stores: list[set[str]] = []
for start, end in loop_ranges:
stores: set[str] = set()
for k in range(start, end + 1):
target_name = store_target(ir_body[k])
if target_name in pinned_locals:
stores.add(pinned_locals[target_name])
loop_stores.append(stores)
result: dict[int, frozenset[str]] = {}
defined: set[str] = set(initial)
for index, instruction in enumerate(ir_body):
for loop_index, (start, _end) in enumerate(loop_ranges):
if start == index:
defined |= loop_stores[loop_index]
# Only record filter sets for builtin calls. User function
# calls go through a different save-set path that this
# analysis can't fully model — Block-wrapped statements
# (the IR escape hatch) and pointer-aliased pinned locals
# could be invalidated by the call in ways our pre-pass
# doesn't see. CarryBranch always wraps a user-function
# (``carry_return`` callee); same skip.
if isinstance(instruction, ir.Call) and instruction.name in self._builtin_clobbers:
result[id(instruction)] = frozenset(defined)
target_name = store_target(instruction)
if target_name in pinned_locals:
defined.add(pinned_locals[target_name])
return result

def _emit_bitfield_read(self, info: FieldInfo, /, *, addr: str) -> None:
"""Emit the load-shift-mask-extend sequence for a bitfield read.

Expand Down Expand Up @@ -2166,18 +2277,56 @@ def _pinned_registers_to_save(self, clobbers: frozenset[str], /) -> list[str]:
E-registers in protected mode and 16-bit aliases in real mode.
Normalise both sides through ``target.low_word`` so the
comparison still matches when the two halves disagree.

When :attr:`_current_call_pinned_initialized` is set (by the
IR lowering pass via :meth:`_compute_pinned_initialized_per_call`),
registers whose pinned local has not yet been written are
filtered out — their value is undefined garbage and saving it
is dead.
"""
low_word = self.target.low_word
normalised_clobbers = frozenset(low_word(register) for register in clobbers)
initialized_filter = self._current_call_pinned_initialized
# Dedup via ``set``: liveness-driven sharing maps several names
# to the same register, and emitting push/pop pairs once per
# name would unbalance the stack.
return sorted({
register
for register in self.pinned_register.values()
if low_word(register) in normalised_clobbers and low_word(register) != "ax"
if low_word(register) in normalised_clobbers
and low_word(register) != "ax"
and (initialized_filter is None or register in initialized_filter)
})

def _prologue_initialized_pinned_registers(self) -> set[str]:
"""Return the set of pinned registers whose value is meaningful at function entry.

Parameters that are pinned (via ``in_register`` attribute,
auto-pin, or fastcall) are loaded into their pin by the
function prologue, so the register holds a meaningful caller-
supplied value from the first instruction onward. Auto-pinned
LOCALS (not parameters) are uninitialized until the first
store and are excluded.

Locals with explicit ``__attribute__((pinned_register(R)))``
live entirely in the register (no stack slot) — their first
write IS the initialisation, so they're treated the same as
auto-pinned locals here.
"""
initialized: set[str] = set()
for name, register in self.pinned_register.items():
if name in self.param_in_register or name in self.in_register_params:
initialized.add(register)
# Catch all parameters that landed in self.pinned_register —
# the prologue loads them either from caller-pushed slots
# ([bp+N]) or from the register-convention fastcall slots
# (acc/dx/cx). Any name from the function's parameter list
# counts; locals do not.
for name in getattr(self, "_current_function_parameter_names", ()):
if name in self.pinned_register:
initialized.add(self.pinned_register[name])
return initialized

def _register_globals(self, declarations: list[Node], /) -> None:
"""Record file-scope declarations and validate their shapes.

Expand Down
Loading