From bbdf18351f9de1e588b8a98372c9b9159baf3241 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 12 Mar 2026 19:01:47 +0000 Subject: [PATCH] phd: collect core when killing non-booting guest this may or may not prove useful in practice; if we're lucky something got funky in device emulation and we can see a stuck thread. on the other hand, if we're unlucky the guest is stuck in a loop and all we see is one vCPU was running while everything else was idle. --- phd-tests/framework/src/test_vm/mod.rs | 10 +++++- phd-tests/framework/src/test_vm/server.rs | 39 ++++++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/phd-tests/framework/src/test_vm/mod.rs b/phd-tests/framework/src/test_vm/mod.rs index 98686861b..32a997c9e 100644 --- a/phd-tests/framework/src/test_vm/mod.rs +++ b/phd-tests/framework/src/test_vm/mod.rs @@ -887,7 +887,15 @@ impl TestVm { .instrument(info_span!("wait_to_boot")); match timeout(timeout_duration, boot).await { - Err(_) => anyhow::bail!("timed out while waiting to boot"), + Err(_) => { + error!( + "Guest did not boot after {}ms! Collecting core..", + timeout_duration.as_millis() + ); + let proc = self.server.as_ref().unwrap(); + proc.core(); + anyhow::bail!("timed out while waiting to boot") + } Ok(inner) => { inner.context("executing guest login sequence")?; } diff --git a/phd-tests/framework/src/test_vm/server.rs b/phd-tests/framework/src/test_vm/server.rs index be88a665b..737f6bad7 100644 --- a/phd-tests/framework/src/test_vm/server.rs +++ b/phd-tests/framework/src/test_vm/server.rs @@ -8,11 +8,12 @@ use std::{ fmt::Debug, net::{SocketAddr, SocketAddrV4}, os::unix::process::CommandExt, + time::SystemTime, }; use anyhow::Result; use camino::{Utf8Path, Utf8PathBuf}; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; use crate::log_config::LogConfig; @@ -44,6 +45,7 @@ pub struct ServerProcessParameters<'a> { pub struct PropolisServer { server: Option, address: SocketAddrV4, + output_dir: Utf8PathBuf, } impl PropolisServer { @@ -117,6 +119,9 @@ impl PropolisServer { let server = PropolisServer { server: Some(server_cmd.spawn()?), address: server_addr, + // Stash the same output directory in case the framework has to + // write any files on behalf of the test run. + output_dir: output_dir.to_owned(), }; info!( @@ -130,6 +135,38 @@ impl PropolisServer { self.address } + /// Collect a core of this server process, placing it in the same output + /// directory as other artifacts of this test. + pub(super) fn core(&self) { + let Some(server_proc) = self.server.as_ref() else { + warn!("Tried to produce a core without a propolis-server?"); + return; + }; + + let core_name = format!( + "core-{}", + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("Time is gone, the song is over") + .as_millis() + ); + let core_path = self.output_dir.join(core_name); + + std::process::Command::new("pfexec") + .args([ + "gcore".as_ref(), + "-o".as_ref(), + core_path.as_os_str(), + server_proc.id().to_string().as_ref(), + ]) + .spawn() + .expect("can try to gcore a process") + .wait() + .expect("can gcore a propolis-server we spawned"); + + warn!("core written to {}", core_path); + } + /// Kills this server process if it hasn't been killed already. pub(super) fn kill(&mut self) { let Some(mut server) = self.server.take() else {