From 108243e899b7ff66255403f99432eaf225da0ed8 Mon Sep 17 00:00:00 2001
From: Ryan Breen <ryan@ryanbreen.com>
Date: Wed, 11 Mar 2026 05:31:32 -0400
Subject: [PATCH 1/2] =?UTF-8?q?perf:=20faster=20DNS/HTTP=20=E2=80=94=20Goo?=
 =?UTF-8?q?gle=20DNS=20first,=20500ms=20timeout,=20net=20timing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- DNS: try Google (8.8.8.8) first instead of hypervisor-specific servers.
  All platforms NAT/bridge to host networking, so Google is universally
  reachable. Avoids 5s timeout per unreachable hypervisor DNS server.
- DNS: reduce per-server timeout from 5s to 500ms with millisecond
  precision (hypervisor DNS responds in <50ms, public DNS in <200ms).
- HTTP: resolve_multi() delegates to resolve_auto() — single DNS
  server order, no duplicate logic.
- Tracing: DNS logs each server attempt with elapsed time; HTTP logs
  DNS, TCP connect, and response receive phases with cumulative timing.
  All guarded by #[cfg(feature = "std")].
- ARM64: increase network RX poll from 20Hz to 100Hz (every 10th tick
  instead of 50th) for 10ms max latency instead of 50ms.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../src/arch_impl/aarch64/timer_interrupt.rs  |  4 +-
 libs/libbreenix/src/dns.rs                    | 72 +++++++++++++++----
 libs/libbreenix/src/http.rs                   | 56 +++++++++------
 3 files changed, 96 insertions(+), 36 deletions(-)

diff --git a/kernel/src/arch_impl/aarch64/timer_interrupt.rs b/kernel/src/arch_impl/aarch64/timer_interrupt.rs
index abb337a4..314817b7 100644
--- a/kernel/src/arch_impl/aarch64/timer_interrupt.rs
+++ b/kernel/src/arch_impl/aarch64/timer_interrupt.rs
@@ -282,10 +282,10 @@ pub extern "C" fn timer_interrupt_handler() {
         crate::drivers::usb::xhci::poll_hid_events();
         // Poll network RX for incoming packets (PCI INTx routing not wired up)
         // Covers both VirtIO net PCI (Parallels) and e1000 (VMware)
-        // Throttle to every 50th tick (~20Hz at 1000Hz timer) to avoid overhead
+        // Poll every 10th tick (~100Hz at 1000Hz timer) for responsive networking
         if (crate::drivers::virtio::net_pci::is_initialized()
             || crate::drivers::e1000::is_initialized())
-            && _count % 50 == 0
+            && _count % 10 == 0
         {
             crate::task::softirqd::raise_softirq(crate::task::softirqd::SoftirqType::NetRx);
         }
diff --git a/libs/libbreenix/src/dns.rs b/libs/libbreenix/src/dns.rs
index c2a7bd2d..21f915a1 100644
--- a/libs/libbreenix/src/dns.rs
+++ b/libs/libbreenix/src/dns.rs
@@ -539,17 +539,17 @@ pub fn resolve(hostname: &str, dns_server: [u8; 4]) -> Result<DnsResult, DnsErro
         return Err(DnsError::SendError);
     }
 
-    // Receive response with 5-second timeout
+    // Receive response with 500ms timeout
     let mut resp_buf = [0u8; DNS_BUF_SIZE];
     let mut received = false;
     let mut resp_len = 0;
 
     // Network packets arrive via interrupt -> softirq -> process_rx().
     // We poll recvfrom() with yield_now() between attempts.
-    // DNS resolution via QEMU SLIRP forwards to host DNS, which can take time.
-    const TIMEOUT_SECS: u64 = 5;
+    // Hypervisor DNS resolvers respond in <50ms; public DNS in <200ms.
+    const TIMEOUT_MS: u64 = 500;
     let start = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
-    let deadline_secs = start.tv_sec as u64 + TIMEOUT_SECS;
+    let start_ms = start.tv_sec as u64 * 1000 + start.tv_nsec as u64 / 1_000_000;
 
     loop {
         match recvfrom(fd, &mut resp_buf, None) {
@@ -559,9 +559,10 @@ pub fn resolve(hostname: &str, dns_server: [u8; 4]) -> Result<DnsResult, DnsErro
                 break;
             }
             _ => {
-                // Check timeout
+                // Check timeout using millisecond precision
                 let now = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
-                if now.tv_sec as u64 >= deadline_secs {
+                let now_ms = now.tv_sec as u64 * 1000 + now.tv_nsec as u64 / 1_000_000;
+                if now_ms >= start_ms + TIMEOUT_MS {
                     break; // Timeout
                 }
                 // Yield to scheduler - allows timer interrupt to fire and process softirqs
@@ -605,18 +606,61 @@ pub fn resolve(hostname: &str, dns_server: [u8; 4]) -> Result<DnsResult, DnsErro
 
 /// Resolve a hostname by trying multiple DNS servers automatically.
 ///
-/// Tries Parallels (10.211.55.1), SLIRP (10.0.2.3), and Google (8.8.8.8)
-/// in order, returning the first successful result. This makes DNS resolution
-/// work across all supported platforms without caller configuration.
+/// Tries Google (8.8.8.8) first since it's reachable from all platforms
+/// (QEMU SLIRP, Parallels, VMware all NAT/bridge to host networking).
+/// Falls back to hypervisor-specific DNS servers if Google fails.
 pub fn resolve_auto(hostname: &str) -> Result<DnsResult, DnsError> {
-    let servers = [PARALLELS_DNS, VMWARE_DNS, SLIRP_DNS, GOOGLE_DNS];
+    let servers: [([u8; 4], &str); 4] = [
+        (GOOGLE_DNS, "8.8.8.8"),
+        (PARALLELS_DNS, "10.211.55.1"),
+        (VMWARE_DNS, "172.16.45.2"),
+        (SLIRP_DNS, "10.0.2.3"),
+    ];
+    #[cfg(feature = "std")]
+    let total_start = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
+
     let mut last_err = DnsError::Timeout;
-    for server in &servers {
+    for (server, _name) in &servers {
+        #[cfg(feature = "std")]
+        let attempt_start = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
+
         match resolve(hostname, *server) {
-            Ok(r) if r.addr[0] != 0 && r.addr[0] != 127 => return Ok(r),
-            Ok(_) => continue,
-            Err(e) => { last_err = e; continue; }
+            Ok(r) if r.addr[0] != 0 && r.addr[0] != 127 => {
+                #[cfg(feature = "std")]
+                {
+                    let elapsed = elapsed_ms(&attempt_start);
+                    let total = elapsed_ms(&total_start);
+                    eprintln!("[dns] resolved '{}' via {} -> {}.{}.{}.{} ({}ms, total {}ms)",
+                        hostname, _name, r.addr[0], r.addr[1], r.addr[2], r.addr[3],
+                        elapsed, total);
+                }
+                return Ok(r);
+            }
+            Ok(_) => {
+                #[cfg(feature = "std")]
+                eprintln!("[dns] '{}' via {}: unusable address ({}ms)",
+                    hostname, _name, elapsed_ms(&attempt_start));
+                continue;
+            }
+            Err(e) => {
+                #[cfg(feature = "std")]
+                eprintln!("[dns] '{}' via {}: {:?} ({}ms)",
+                    hostname, _name, e, elapsed_ms(&attempt_start));
+                last_err = e;
+                continue;
+            }
         }
     }
+    #[cfg(feature = "std")]
+    eprintln!("[dns] '{}' FAILED all servers (total {}ms)", hostname, elapsed_ms(&total_start));
     Err(last_err)
 }
+
+/// Compute elapsed milliseconds since a start time.
+#[cfg(feature = "std")]
+fn elapsed_ms(start: &Timespec) -> u64 {
+    let now = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
+    let start_ms = start.tv_sec as u64 * 1000 + start.tv_nsec as u64 / 1_000_000;
+    let now_ms = now.tv_sec as u64 * 1000 + now.tv_nsec as u64 / 1_000_000;
+    now_ms.saturating_sub(start_ms)
+}
diff --git a/libs/libbreenix/src/http.rs b/libs/libbreenix/src/http.rs
index 5d972279..e3ddae0d 100644
--- a/libs/libbreenix/src/http.rs
+++ b/libs/libbreenix/src/http.rs
@@ -16,11 +16,15 @@
 //! }
 //! ```
 
-use crate::dns::{resolve, DnsError, DnsResult, SLIRP_DNS, PARALLELS_DNS, GOOGLE_DNS};
+use crate::dns::{resolve_auto, DnsError, DnsResult};
 use crate::error::Error;
 use crate::socket::{connect_inet, recv, send, socket, AF_INET, SOCK_STREAM, SockAddrIn};
 use crate::syscall::{nr, raw};
+#[cfg(feature = "std")]
+use crate::time::now_monotonic;
 use crate::types::Fd;
+#[cfg(feature = "std")]
+use crate::types::Timespec;
 
 // ============================================================================
 // Constants
@@ -697,18 +701,20 @@ fn close_fd(fd: Fd) {
     }
 }
 
-/// Try multiple DNS servers for platform portability (Parallels, QEMU, external).
+/// Compute elapsed milliseconds since a start time.
+#[cfg(feature = "std")]
+fn http_elapsed_ms(start: &Timespec) -> u64 {
+    let now = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
+    let start_ms = start.tv_sec as u64 * 1000 + start.tv_nsec as u64 / 1_000_000;
+    let now_ms = now.tv_sec as u64 * 1000 + now.tv_nsec as u64 / 1_000_000;
+    now_ms.saturating_sub(start_ms)
+}
+
+/// Try multiple DNS servers, starting with Google (reachable from all platforms).
 fn resolve_multi(hostname: &str) -> Result<DnsResult, HttpError> {
-    let servers = [PARALLELS_DNS, SLIRP_DNS, GOOGLE_DNS];
-    let mut last_err = DnsError::Timeout;
-    for server in &servers {
-        match resolve(hostname, *server) {
-            Ok(r) if r.addr[0] != 0 && r.addr[0] != 127 => return Ok(r),
-            Ok(_) => continue,
-            Err(e) => { last_err = e; continue; }
-        }
-    }
-    Err(HttpError::DnsError(last_err))
+    // Google DNS first — always reachable via NAT/bridge from any hypervisor.
+    // Fall back to hypervisor-specific servers only if Google fails.
+    resolve_auto(hostname).map_err(HttpError::DnsError)
 }
 
 // ============================================================================
@@ -739,35 +745,38 @@ pub fn http_request(
             parsed.host, parsed.port, parsed.path, parsed.is_tls);
     }
 
-    // Resolve hostname to IP — try multiple DNS servers for platform portability
+    // Resolve hostname to IP
+    #[cfg(feature = "std")]
+    let request_start = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
     #[cfg(feature = "std")]
     if verbose { eprint!("* Resolving {}...\n", parsed.host); }
     let dns_result = resolve_multi(parsed.host)?;
     let ip = dns_result.addr;
     #[cfg(feature = "std")]
-    if verbose {
-        eprint!("* Resolved to {}.{}.{}.{}\n", ip[0], ip[1], ip[2], ip[3]);
+    {
+        let dns_ms = http_elapsed_ms(&request_start);
+        eprint!("[http] DNS resolved {}.{}.{}.{} ({}ms)\n", ip[0], ip[1], ip[2], ip[3], dns_ms);
     }
 
     // Create TCP socket
     #[cfg(feature = "std")]
     if verbose { eprint!("* Creating TCP socket...\n"); }
     let fd = socket(AF_INET, SOCK_STREAM, 0).map_err(|_| HttpError::SocketError)?;
-    #[cfg(feature = "std")]
-    if verbose { eprint!("* Socket created (fd={})\n", fd.raw()); }
 
     // Connect to server
     #[cfg(feature = "std")]
+    let connect_start = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
+    #[cfg(feature = "std")]
     if verbose { eprint!("* Connecting to port {}...\n", parsed.port); }
     let server_addr = SockAddrIn::new(ip, parsed.port);
     if let Err(_e) = connect_inet(fd, &server_addr) {
         #[cfg(feature = "std")]
-        if verbose { eprint!("* Connect failed\n"); }
+        eprint!("[http] TCP connect FAILED ({}ms)\n", http_elapsed_ms(&connect_start));
         close_fd(fd);
         return Err(HttpError::ConnectError);
     }
     #[cfg(feature = "std")]
-    if verbose { eprint!("* Connected\n"); }
+    eprint!("[http] TCP connected ({}ms)\n", http_elapsed_ms(&connect_start));
 
     // Establish connection (plain or TLS)
     let mut conn = if parsed.is_tls {
@@ -832,6 +841,8 @@ pub fn http_request(
 
     // Receive response
     #[cfg(feature = "std")]
+    let recv_start = now_monotonic().unwrap_or(Timespec { tv_sec: 0, tv_nsec: 0 });
+    #[cfg(feature = "std")]
     if verbose { eprint!("* Waiting for response...\n"); }
     let mut total_received = 0usize;
     let max_read = response_buf.len();
@@ -906,7 +917,12 @@ pub fn http_request(
     }
 
     #[cfg(feature = "std")]
-    if verbose { eprint!("* Total received: {} bytes\n", total_received); }
+    {
+        let recv_ms = http_elapsed_ms(&recv_start);
+        let total_ms = http_elapsed_ms(&request_start);
+        eprint!("[http] response received: {} bytes (recv {}ms, total {}ms)\n",
+            total_received, recv_ms, total_ms);
+    }
 
     // Decode chunked transfer encoding if detected
     let decoded_body_len = if chunked {

From f0139348abf6d4ecb1a33f911f6b169edba68231 Mon Sep 17 00:00:00 2001
From: Ryan Breen <ryan@ryanbreen.com>
Date: Wed, 11 Mar 2026 05:42:04 -0400
Subject: [PATCH 2/2] feat: VMware SMP relocatable boot, PAN fix, btop/procfs
 improvements

- boot.S: Add SPAN bit to SCTLR_EL1 (don't auto-set PAN on exception
  entry) and explicitly clear PAN on both primary and secondary CPUs.
  VMware sets PAN=1 by default, causing permission faults on user-mapped
  page accesses from kernel code.
- boot.S: Compute relocation delta (x21) on secondary CPU entry for
  VMware where RAM starts at 0x80000000 vs link address 0x40080000.
  Apply delta to all symbol references from literal pools (SMP_UART_PHYS,
  SMP_STACK_BASE_PHYS, exception_vectors_boot, SMP_MAIR/TCR/TTBR).
- smp.rs: Add ram_base_offset() to SECONDARY_CPU_ENTRY_PHYS for correct
  physical entry point on VMware.
- boot.S: Secondary CPUs now initialize SCTLR_EL1 to match primary
  (LSMAOE, nTLSMD, SPAN, EOS) for PSCI direct-EL1 path.
- handlers.rs: Fix sys_fcntl to use single mutable borrow instead of
  drop-and-reacquire pattern, return EAGAIN instead of EBADF on lock
  contention.
- procfs: Add global_ticks to /proc/stat for accurate CPU% in btop.
- btop: Use global_ticks for CPU% delta (matches per-process tick
  scale), filter out Terminated processes from display.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 kernel/src/arch_impl/aarch64/boot.S | 43 +++++++++++++++++++++++++++++
 kernel/src/arch_impl/aarch64/smp.rs |  8 ++++--
 kernel/src/fs/procfs/mod.rs         |  2 ++
 kernel/src/syscall/handlers.rs      | 25 ++++-------------
 userspace/programs/src/btop.rs      | 23 +++++++++------
 5 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/kernel/src/arch_impl/aarch64/boot.S b/kernel/src/arch_impl/aarch64/boot.S
index cdbe010a..16cc8a0e 100644
--- a/kernel/src/arch_impl/aarch64/boot.S
+++ b/kernel/src/arch_impl/aarch64/boot.S
@@ -101,6 +101,7 @@ drop_to_el1:
     mov x0, #0
     orr x0, x0, #(1 << 29)  // LSMAOE
     orr x0, x0, #(1 << 28)  // nTLSMD
+    orr x0, x0, #(1 << 23)  // SPAN (don't auto-set PAN on exception entry)
     orr x0, x0, #(1 << 11)  // EOS (exception return on stack)
     msr sctlr_el1, x0
 
@@ -124,6 +125,12 @@ el1_init:
     msr cpacr_el1, x0
     isb
 
+    // Clear PAN — architecturally UNKNOWN if we entered EL1 directly
+    // (the EL2 path sets PAN=0 via SPSR_EL2, but be safe)
+    // Encoding: MSR PAN, #0 = 0xD500409F
+    .inst 0xd500409f
+    isb
+
     // Set up boot stack pointer (low)
     ldr x0, =__boot_stack_top
     mov sp, x0
@@ -768,10 +775,20 @@ secondary_cpu_entry:
     mov x19, x0              // PSCI path: cpu_id from x0
 1:
 
+    // Compute relocation delta: on VMware, the kernel is loaded at a different
+    // physical address than the linker expects (RAM at 0x80000000 vs link at
+    // 0x40080000). All `ldr Xn, =symbol` loads get the linker address from the
+    // literal pool; we must add the delta to dereference them correctly.
+    // x21 = (actual PC of this label) - (linker address of this label)
+    adr x21, secondary_cpu_entry
+    ldr x22, =secondary_cpu_entry
+    sub x21, x21, x22          // x21 = relocation delta (0 on QEMU, 0x40000000 on VMware)
+
     // Debug breadcrumb: write cpu_id digit + '@' to UART (physical, pre-MMU)
     // Load UART physical address from SMP_UART_PHYS (set by CPU 0 Rust code)
     // Guard: skip if address is 0 (BSS default before CPU 0 writes it)
     ldr x2, =SMP_UART_PHYS
+    add x2, x2, x21            // relocate: linker addr -> physical addr
     ldr x2, [x2]             // x2 = UART phys addr
     cbz x2, 2f               // skip breadcrumbs if 0 (not yet initialized)
     add x3, x19, #'0'        // '1' for CPU 1, '2' for CPU 2, etc.
@@ -799,6 +816,7 @@ secondary_el1_init:
     // Load UART phys addr into callee-saved x20 for breadcrumbs.
     // x20=0 means UART not set; all breadcrumbs guard against this.
     ldr x20, =SMP_UART_PHYS
+    add x20, x20, x21        // relocate
     ldr x20, [x20]           // x20 = UART phys addr (preserved across init)
 
     // Breadcrumb 'A' = entered EL1 init
@@ -812,6 +830,24 @@ secondary_el1_init:
     msr cpacr_el1, x0
     isb
 
+    // Initialize SCTLR_EL1 for PSCI direct-EL1 path.
+    // Secondary CPUs inherit whatever the hypervisor set, which may differ
+    // from primary. Must match primary CPU's configuration.
+    mov x0, #0
+    orr x0, x0, #(1 << 29)  // LSMAOE
+    orr x0, x0, #(1 << 28)  // nTLSMD
+    orr x0, x0, #(1 << 23)  // SPAN (don't auto-set PAN on exception entry)
+    orr x0, x0, #(1 << 11)  // EOS
+    msr sctlr_el1, x0
+    isb
+
+    // Clear PAN (Privileged Access Never).
+    // PSTATE.PAN is architecturally UNKNOWN on reset — VMware sets it to 1,
+    // which causes permission faults when kernel code accesses user-mapped pages.
+    // Encoding: MSR PAN, #0 = 0xD500409F (op1=000, CRm=0000, op2=100)
+    .inst 0xd500409f
+    isb
+
     // Set up per-CPU boot stack (physical addresses, before MMU)
     // Stack top = SMP_STACK_BASE_PHYS + (cpu_id + 1) * 0x20_0000
     // SMP_STACK_BASE_PHYS is set by CPU 0 Rust code to (ram_base + 0x01000000).
@@ -823,6 +859,7 @@ secondary_el1_init:
     add x0, x0, #1           // cpu_id + 1
     lsl x0, x0, #21          // * 0x20_0000 (2MB)
     ldr x1, =SMP_STACK_BASE_PHYS
+    add x1, x1, x21          // relocate
     ldr x1, [x1]             // x1 = actual stack base (set by CPU 0)
     add x0, x0, x1
     mov sp, x0
@@ -838,6 +875,7 @@ secondary_el1_init:
 
     // Set VBAR_EL1 to boot exception vectors (low) for now
     ldr x0, =exception_vectors_boot
+    add x0, x0, x21          // relocate
     msr vbar_el1, x0
     isb
 
@@ -852,19 +890,23 @@ secondary_el1_init:
     // populates from its actual register values. This handles both
     // QEMU (boot.S page tables/config) and Parallels (loader page tables/config).
     ldr x0, =SMP_MAIR_PHYS
+    add x0, x0, x21          // relocate
     ldr x0, [x0]             // x0 = CPU 0's MAIR value
     msr mair_el1, x0
     ldr x0, =SMP_TCR_PHYS
+    add x0, x0, x21          // relocate
     ldr x0, [x0]             // x0 = CPU 0's TCR value
     msr tcr_el1, x0
     isb
 
     // Load TTBR0 from SMP_TTBR0_PHYS (set by CPU 0 Rust code)
     ldr x0, =SMP_TTBR0_PHYS
+    add x0, x0, x21          // relocate
     ldr x0, [x0]             // x0 = actual TTBR0 physical address
     msr ttbr0_el1, x0
     // Load TTBR1 from SMP_TTBR1_PHYS
     ldr x0, =SMP_TTBR1_PHYS
+    add x0, x0, x21          // relocate
     ldr x0, [x0]             // x0 = actual TTBR1 physical address
     msr ttbr1_el1, x0
     dsb ishst
@@ -941,6 +983,7 @@ secondary_drop_to_el1:
     mov x0, #0
     orr x0, x0, #(1 << 29)  // LSMAOE
     orr x0, x0, #(1 << 28)  // nTLSMD
+    orr x0, x0, #(1 << 23)  // SPAN (don't auto-set PAN on exception entry)
     orr x0, x0, #(1 << 11)  // EOS
     msr sctlr_el1, x0
 
diff --git a/kernel/src/arch_impl/aarch64/smp.rs b/kernel/src/arch_impl/aarch64/smp.rs
index 54a51d6b..dc1c97df 100644
--- a/kernel/src/arch_impl/aarch64/smp.rs
+++ b/kernel/src/arch_impl/aarch64/smp.rs
@@ -231,8 +231,12 @@ pub fn release_cpu(cpu_id: usize) -> i64 {
         return -2; // INVALID_PARAMS
     }
 
-    // Get the physical address of the secondary entry point in boot.S
-    let entry_phys = unsafe { core::ptr::read_volatile(&SECONDARY_CPU_ENTRY_PHYS) };
+    // Get the physical address of the secondary entry point in boot.S.
+    // SECONDARY_CPU_ENTRY_PHYS holds the linker address (base 0x40080000).
+    // On VMware, RAM starts at 0x80000000, so the actual physical address
+    // is offset by ram_base_offset (0x40000000).
+    let entry_phys = unsafe { core::ptr::read_volatile(&SECONDARY_CPU_ENTRY_PHYS) }
+        + crate::platform_config::ram_base_offset();
 
     // MPIDR: Aff0 = cpu_id, all other affinity fields = 0
     // This is the standard layout for ARM virt machines (QEMU, Parallels, VMware)
diff --git a/kernel/src/fs/procfs/mod.rs b/kernel/src/fs/procfs/mod.rs
index 63defe74..ded541f7 100644
--- a/kernel/src/fs/procfs/mod.rs
+++ b/kernel/src/fs/procfs/mod.rs
@@ -755,6 +755,7 @@ fn generate_stat() -> String {
          interrupts {}\n\
          context_switches {}\n\
          timer_ticks {}\n\
+         global_ticks {}\n\
          forks {}\n\
          execs {}\n\
          cow_faults {}\n\
@@ -765,6 +766,7 @@ fn generate_stat() -> String {
         IRQ_TOTAL.aggregate(),
         CTX_SWITCH_TOTAL.aggregate(),
         TIMER_TICK_TOTAL.aggregate(),
+        crate::time::get_ticks(),
         FORK_TOTAL.aggregate(),
         EXEC_TOTAL.aggregate(),
         COW_FAULT_TOTAL.aggregate(),
diff --git a/kernel/src/syscall/handlers.rs b/kernel/src/syscall/handlers.rs
index cfb05149..706b348e 100644
--- a/kernel/src/syscall/handlers.rs
+++ b/kernel/src/syscall/handlers.rs
@@ -3026,17 +3026,17 @@ pub fn sys_fcntl(fd: u64, cmd: u64, arg: u64) -> SyscallResult {
         }
     };
 
-    let manager_guard = match crate::process::try_manager() {
+    let mut manager_guard = match crate::process::try_manager() {
         Some(guard) => guard,
         None => {
             log::error!("sys_fcntl: Failed to get process manager");
-            return SyscallResult::Err(9); // EBADF
+            return SyscallResult::Err(11); // EAGAIN
         }
     };
 
-    let _process = match manager_guard
-        .as_ref()
-        .and_then(|m| m.find_process_by_thread(thread_id))
+    let process = match manager_guard
+        .as_mut()
+        .and_then(|m| m.find_process_by_thread_mut(thread_id))
         .map(|(_, p)| p)
     {
         Some(p) => p,
@@ -3046,21 +3046,6 @@ pub fn sys_fcntl(fd: u64, cmd: u64, arg: u64) -> SyscallResult {
         }
     };
 
-    // Need to reborrow mutably for fd_table operations
-    drop(manager_guard);
-    let mut manager_guard = match crate::process::try_manager() {
-        Some(guard) => guard,
-        None => return SyscallResult::Err(9),
-    };
-    let process = match manager_guard
-        .as_mut()
-        .and_then(|m| m.find_process_by_thread_mut(thread_id))
-        .map(|(_, p)| p)
-    {
-        Some(p) => p,
-        None => return SyscallResult::Err(9),
-    };
-
     match cmd {
         F_DUPFD => {
             match process.fd_table.dup_at_least(fd, arg, false) {
diff --git a/userspace/programs/src/btop.rs b/userspace/programs/src/btop.rs
index 6d837a10..ddeecc96 100644
--- a/userspace/programs/src/btop.rs
+++ b/userspace/programs/src/btop.rs
@@ -431,7 +431,7 @@ fn main() {
 
     // Previous tick counts for CPU% delta computation
     let mut prev_ticks: Vec<(u64, u64)> = Vec::new(); // (pid, ticks)
-    let mut prev_timer_ticks: u64 = 0;
+    let mut prev_global_ticks: u64 = 0;
     let mut prev_gpu_bytes: u64 = 0;
     let mut prev_gpu_full: u64 = 0;
     let mut prev_gpu_partial: u64 = 0;
@@ -471,7 +471,7 @@ fn main() {
         let syscalls = parse_value(stat, b"syscalls");
         let interrupts = parse_value(stat, b"interrupts");
         let ctx_switches = parse_value(stat, b"context_switches");
-        let timer_ticks = parse_value(stat, b"timer_ticks");
+        let global_ticks = parse_value(stat, b"global_ticks");
         let forks = parse_value(stat, b"forks");
         let execs = parse_value(stat, b"execs");
         let cow_faults = parse_value(stat, b"cow_faults");
@@ -504,8 +504,11 @@ fn main() {
             }
         }
 
-        // Compute CPU% deltas
-        let tick_delta = timer_ticks.saturating_sub(prev_timer_ticks);
+        // Compute CPU% deltas using global_ticks (same clock as per-process ticks).
+        // global_ticks is incremented only by CPU 0, matching the scale of per-process
+        // cpu_ticks_total which uses get_ticks() deltas. This gives htop-style
+        // percentages: 100% = one full CPU.
+        let tick_delta = global_ticks.saturating_sub(prev_global_ticks);
         let mut cpu_pcts: Vec<(u64, u64)> = Vec::new(); // (pid, pct*10 for 1 decimal)
         for proc in &procs {
             let prev = prev_ticks.iter().find(|(p, _)| *p == proc.pid);
@@ -524,7 +527,7 @@ fn main() {
         for proc in &procs {
             prev_ticks.push((proc.pid, proc.cpu_ticks));
         }
-        prev_timer_ticks = timer_ticks;
+        prev_global_ticks = global_ticks;
 
         // ── Render ───────────────────────────────────────────────────────
 
@@ -582,16 +585,20 @@ fn main() {
 
         for &idx in &sorted_indices {
             let proc = &procs[idx];
+
+            // Skip terminated/zombie processes (e.g. unreapable children)
+            let state_bytes = &proc.state[..proc.state_len];
+            if state_bytes.starts_with(b"Terminated") {
+                continue;
+            }
+
             let pct10 = cpu_pcts.iter().find(|(p, _)| *p == proc.pid).map(|(_, p)| *p).unwrap_or(0);
 
             // Color based on state
-            let state_bytes = &proc.state[..proc.state_len];
             if state_bytes == b"Running" {
                 emit_str("\x1b[32m"); // Green
             } else if state_bytes == b"Blocked" {
                 emit_str("\x1b[33m"); // Yellow
-            } else if state_bytes.starts_with(b"Terminated") {
-                emit_str("\x1b[31m"); // Red
             }
 
             // PID (right-aligned in 5 chars)