diff --git a/Cargo.toml b/Cargo.toml index 89b4cd1f..6eb2720d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,8 @@ ecam_pcie = [] # Standard ECAM mechanism (default for most platforms) dwc_pcie = [] # DesignWare PCIe Core mechanism (CFG0/CFG1, used by RK3568) loongarch64_pcie = [] # LoongArch PCIe mechanism (used by LoongArch platforms) no_pcie_bar_realloc = [] +dwc_msi = ["dwc_pcie"] # DesignWare Native MSI (used when arch doesn't provide MSI, e.g., ARM without ITS) +pci_init_delay = ["dwc_pcie"] # Delay hvisor PCI init until guest DBI offset 0 access ############# aarch64 ############## # irqchip driver diff --git a/platform/aarch64/imx8mp/board.rs b/platform/aarch64/imx8mp/board.rs index c9b5ece0..5ba064f4 100644 --- a/platform/aarch64/imx8mp/board.rs +++ b/platform/aarch64/imx8mp/board.rs @@ -21,6 +21,9 @@ use crate::{ config::*, }; +use crate::pci::vpci_dev::VpciDevType; +use crate::pci_dev; + pub const BOARD_NAME: &str = "imx8mp"; pub const BOARD_NCPUS: usize = 4; @@ -107,14 +110,23 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ // virtual_start: 0x30890000, // size: 0x1000, // }, // serial + // 0x32f00000 + // HvConfigMemoryRegion { + // mem_type: MEM_TYPE_IO, + // physical_start: 0x32f00000, + // virtual_start: 0x32f00000, + // size: 0x10000, + // }, // pcie-phy ]; pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 32 + 0x20; pub const ROOT_ZONE_IRQS_BITMAP: &[BitmapWord] = &get_irqs_bitmap(&[ 35, 36, 37, 38, 45, 52, 55, 56, 57, 59, 64, 67, 75, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 135, 150, 151, 152, 162, + 105, 135, 150, 151, 152, 162, 172, 159, ]); +pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; + pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { is_aarch32: 0, gic_config: GicConfig::Gicv3(Gicv3Config { @@ -127,4 +139,37 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { }), }; -pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; +pub const ROOT_PCI_CONFIG: &[HvPciConfig] = &[HvPciConfig { + ecam_base: 0x33800000, + ecam_size: 0x400000, + io_base: 0x1ff80000, + io_size: 0x10000, + pci_io_base: 0x0, + mem32_base: 0x1800_0000, + mem32_size: 0x7f0_0000, + pci_mem32_base: 0x1800_0000, + mem64_base: 0x0, + mem64_size: 0x0, + pci_mem64_base: 0x0, + bus_range_begin: 0x0, + bus_range_end: 0x1f, + domain: 0x0, +}]; + +pub const ROOT_DWC_ATU_CONFIG: &[HvDwcAtuConfig] = &[HvDwcAtuConfig { + ecam_base: 0x33800000, + dbi_base: 0x33800000, + dbi_size: 0x400000, + apb_base: 0x0, + apb_size: 0x0, + cfg_base: 0x1ff00000, + cfg_size: 0x80000, + io_cfg_atu_shared: 1, + io_atu_index: 1, + dw_msi_irq: 172, +}]; + +pub const ROOT_PCI_DEVS: [HvPciDevConfig; 2] = [ + pci_dev!(0x0, 0x00, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x01, 0x0, 0x0, VpciDevType::Physical), +]; diff --git a/platform/aarch64/imx8mp/cargo/features b/platform/aarch64/imx8mp/cargo/features index 7acf44c6..15ad917c 100644 --- a/platform/aarch64/imx8mp/cargo/features +++ b/platform/aarch64/imx8mp/cargo/features @@ -1,2 +1,7 @@ gicv3 imx_uart +pci +dwc_pcie +no_pcie_bar_realloc +dwc_msi +pci_init_delay \ No newline at end of file diff --git a/platform/aarch64/imx8mp/configs/zone1-linux.json b/platform/aarch64/imx8mp/configs/zone1-linux.json index dd7d732a..72d76eea 100644 --- a/platform/aarch64/imx8mp/configs/zone1-linux.json +++ b/platform/aarch64/imx8mp/configs/zone1-linux.json @@ -2,80 +2,148 @@ "arch": "arm64", "name": "linux2", "zone_id": 1, - "cpus": [2, 3], + "cpus": [ + 2, + 3 + ], "memory_regions": [ { "type": "ram", "physical_start": "0x50000000", - "virtual_start": "0x50000000", + "virtual_start": "0x50000000", "size": "0x30000000" }, { "type": "io", "physical_start": "0x30a60000", - "virtual_start": "0x30a60000", + "virtual_start": "0x30a60000", "size": "0x10000" }, { "type": "virtio", "physical_start": "0xa003c00", - "virtual_start": "0xa003c00", + "virtual_start": "0xa003c00", "size": "0x200" }, { "type": "virtio", "physical_start": "0xa003800", - "virtual_start": "0xa003800", + "virtual_start": "0xa003800", "size": "0x200" }, { "type": "virtio", "physical_start": "0xa003600", - "virtual_start": "0xa003600", + "virtual_start": "0xa003600", "size": "0x200" }, { "type": "io", "physical_start": "0x38000000", - "virtual_start": "0x38000000", + "virtual_start": "0x38000000", "size": "0x10000" }, { "type": "io", "physical_start": "0x38500000", - "virtual_start": "0x38500000", + "virtual_start": "0x38500000", "size": "0x20000" }, { "type": "io", "physical_start": "0x32e80000", - "virtual_start": "0x32e80000", + "virtual_start": "0x32e80000", "size": "0x20000" }, { "type": "io", "physical_start": "0x30c00000", - "virtual_start": "0x30c00000", + "virtual_start": "0x30c00000", "size": "0x400000" }, { "type": "io", "physical_start": "0x32fc0000", - "virtual_start": "0x32fc0000", + "virtual_start": "0x32fc0000", "size": "0x20000" } ], - "interrupts": [35, 37, 38, 45, 56, 57, 61, 75, 76, 78, 135, 162], + "interrupts": [ + 35, + 37, + 38, + 45, + 56, + 57, + 61, + 75, + 76, + 78, + 135, + 162, + 172 + ], "ivc_configs": [], "kernel_filepath": "./Image", "dtb_filepath": "./linux2.dtb", "kernel_load_paddr": "0x50400000", - "dtb_load_paddr": "0x50000000", - "entry_point": "0x50400000", + "dtb_load_paddr": "0x50000000", + "entry_point": "0x50400000", "arch_config": { + "gic_version": "v3", "gicd_base": "0x38800000", "gicd_size": "0x10000", "gicr_base": "0x38880000", - "gicr_size": "0xc0000" - } + "gicr_size": "0xc0000", + "is_aarch32": false + }, + "pci_config": [ + { + "ecam_base": "0x33800000", + "ecam_size": "0x400000", + "io_base": "0x1ff80000", + "io_size": "0x10000", + "pci_io_base": "0x0", + "mem32_base": "0x18000000", + "mem32_size": "0x7f00000", + "pci_mem32_base": "0x18000000", + "mem64_base": "0x0", + "mem64_size": "0x0", + "pci_mem64_base": "0x0", + "bus_range_begin": "0x0", + "bus_range_end": "0x1f", + "domain": "0x0" + } + ], + "dwc_atu_config": [ + { + "ecam_base": "0x33800000", + "dbi_base": "0x33800000", + "dbi_size": "0x400000", + "apb_base": "0x0", + "apb_size": "0x0", + "cfg_base": "0x1ff00000", + "cfg_size": "0x10000", + "io_cfg_atu_shared": 1, + "io_atu_index": 1, + "dw_msi_irq": 172 + } + ], + "num_pci_devs": 2, + "alloc_pci_devs": [ + { + "domain": "0x0", + "bus": "0x0", + "device": "0x0", + "function": "0x0", + "dev_type": "0" + }, + { + "domain": "0x0", + "bus": "0x1", + "device": "0x0", + "function": "0x1", + "dev_type": "0" + } + ] } diff --git a/platform/aarch64/rk3568/board.rs b/platform/aarch64/rk3568/board.rs index 32ed5d3c..185bd9d9 100644 --- a/platform/aarch64/rk3568/board.rs +++ b/platform/aarch64/rk3568/board.rs @@ -348,6 +348,8 @@ pub const ROOT_DWC_ATU_CONFIG: &[HvDwcAtuConfig] = &[HvDwcAtuConfig { cfg_base: 0xf2000000, cfg_size: 0x80000 * 2, io_cfg_atu_shared: 0, + io_atu_index: 0, + dw_msi_irq: 0, }]; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 2] = [ diff --git a/src/config.rs b/src/config.rs index aecc2759..a8edd516 100644 --- a/src/config.rs +++ b/src/config.rs @@ -257,6 +257,10 @@ pub struct HvDwcAtuConfig { // set 1 if io base use atu0, when hvisor need set mmio for io // normally, when num-viewport less than 4, io_cfg_atu_shared is 1, otherwise is 0 pub io_cfg_atu_shared: u64, + // choose the atu index for io and cfg access, when io_cfg_atu_shared is 1, io and cfg use the same atu index, otherwise use different atu index + pub io_atu_index: u64, + // Shared hardware interrupt ID for this DWC RC MSI block + pub dw_msi_irq: u64, } impl HvDwcAtuConfig { @@ -273,6 +277,8 @@ impl HvDwcAtuConfig { cfg_base: 0, cfg_size: 0, io_cfg_atu_shared: 0, + io_atu_index: 0, + dw_msi_irq: 0, } } } diff --git a/src/consts.rs b/src/consts.rs index 62f93785..e65e98e8 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -88,6 +88,7 @@ pub fn hv_end() -> VirtAddr { pub const IPI_EVENT_CLEAR_INJECT_IRQ: usize = 4; pub const IPI_EVENT_UPDATE_HART_LINE: usize = 5; pub const IPI_EVENT_SEND_IPI: usize = 6; +pub const IPI_EVENT_DWC_MSI_INJECT: usize = 7; extern "C" { /// Entry point of the hypervisor written in assembly. diff --git a/src/device/irqchip/gicv3/mod.rs b/src/device/irqchip/gicv3/mod.rs index a8d74c50..28f1d7b5 100644 --- a/src/device/irqchip/gicv3/mod.rs +++ b/src/device/irqchip/gicv3/mod.rs @@ -44,6 +44,7 @@ use crate::hypercall::SGI_IPI_ID; use crate::zone::Zone; const ICH_HCR_UIE: u64 = 1 << 1; + //TODO: add Distributor init pub fn gicc_init() { //TODO: add Redistributor init @@ -109,6 +110,9 @@ pub fn gicv3_handle_irq_el1() { warn!("skip sgi {}", irq_id); deactivate_irq(irq_id); } else { + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + let mut is_dwc_msi_irq = false; + if irq_id == 27 { // virtual timer interrupt TIMER_INTERRUPT_COUNTER.fetch_add(1, core::sync::atomic::Ordering::SeqCst); @@ -127,12 +131,30 @@ pub fn gicv3_handle_irq_el1() { } else if irq_id > 31 { //inject phy irq trace!("*** get spi_irq id = {}", irq_id); + + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + { + if let Some(domain_id) = + crate::pci::dwc_msi::get_domain_id_by_irq(irq_id as u32) + { + is_dwc_msi_irq = true; + crate::pci::dwc_msi::dwc_msi_transfer_and_inject(domain_id, irq_id); + } + } } else { warn!("not konw irq id = {}", irq_id); } + + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + if irq_id != 25 && !is_dwc_msi_irq { + inject_irq(irq_id, true); + } + + #[cfg(not(all(feature = "dwc_pcie", feature = "dwc_msi")))] if irq_id != 25 { inject_irq(irq_id, true); } + deactivate_irq(irq_id); } } diff --git a/src/device/irqchip/gicv3/vgic.rs b/src/device/irqchip/gicv3/vgic.rs index 8caab289..4cf4926c 100644 --- a/src/device/irqchip/gicv3/vgic.rs +++ b/src/device/irqchip/gicv3/vgic.rs @@ -309,7 +309,19 @@ pub fn vgicv3_dist_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { match reg { reg if reg_range(GICD_IROUTER, 1024, 8).contains(®) => { - vgicv3_handle_irq_ops(mmio, (reg - GICD_IROUTER) as u32 / 8) + let irq = (reg - GICD_IROUTER) as u32 / 8; + + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + { + // For zone0, the domainmsiinfo is empty, but it will always register the intterrupt to cpu0 + // So this remap operation is needed for other zones + if mmio.is_write && crate::pci::dwc_msi::is_dwc_msi_hwirq(irq) { + info!("remap dwc msi hwirq {} to cpu0!", irq); + mmio.value = 0; + } + } + + vgicv3_handle_irq_ops(mmio, irq) } reg if reg_range(GICD_ITARGETSR, 1024, 1).contains(®) => { vgicv3_handle_irq_ops(mmio, (reg - GICD_ITARGETSR) as u32) diff --git a/src/event.rs b/src/event.rs index f31b1159..83b997fd 100644 --- a/src/event.rs +++ b/src/event.rs @@ -17,7 +17,8 @@ use crate::{ arch::ipi::{arch_check_events, arch_prepare_send_event, arch_send_event}, consts::{ - IPI_EVENT_CLEAR_INJECT_IRQ, IPI_EVENT_SEND_IPI, IPI_EVENT_UPDATE_HART_LINE, MAX_CPU_NUM, + IPI_EVENT_CLEAR_INJECT_IRQ, IPI_EVENT_DWC_MSI_INJECT, IPI_EVENT_SEND_IPI, + IPI_EVENT_UPDATE_HART_LINE, MAX_CPU_NUM, }, cpu_data::this_cpu_data, device::{irqchip::inject_irq, virtio_trampoline::handle_virtio_irq}, @@ -103,6 +104,18 @@ pub fn check_events() -> bool { inject_irq(IRQ_WAKEUP_VIRTIO_DEVICE, false); true } + Some(IPI_EVENT_DWC_MSI_INJECT) => { + #[cfg(all( + target_arch = "aarch64", + feature = "gicv3", + feature = "dwc_pcie", + feature = "dwc_msi" + ))] + { + crate::pci::dwc_msi::handle_dwc_msi_inject_event(); + } + true + } Some(IPI_EVENT_CLEAR_INJECT_IRQ) | Some(IPI_EVENT_UPDATE_HART_LINE) | Some(IPI_EVENT_SEND_IPI) => { diff --git a/src/main.rs b/src/main.rs index 736d209b..639f8783 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,7 +73,7 @@ use arch::{cpu::cpu_start, entry::arch_entry}; use config::root_zone_config; use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use cpu_data::PerCpu; -#[cfg(feature = "pci")] +#[cfg(all(feature = "pci", not(feature = "pci_init_delay")))] use pci::pci_config::hvisor_pci_init; static INITED_CPUS: AtomicU32 = AtomicU32::new(0); @@ -137,7 +137,7 @@ fn primary_init_early() { let root_config = root_zone_config(); - #[cfg(feature = "pci")] + #[cfg(all(feature = "pci", not(feature = "pci_init_delay")))] if root_config.num_pci_bus > 0 { let num_pci_bus = root_config.num_pci_bus as usize; let _ = hvisor_pci_init(&root_config.pci_config[..num_pci_bus]); diff --git a/src/pci/dwc_msi.rs b/src/pci/dwc_msi.rs new file mode 100644 index 00000000..fbdc338c --- /dev/null +++ b/src/pci/dwc_msi.rs @@ -0,0 +1,342 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// + +use alloc::collections::btree_map::BTreeMap; +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use alloc::collections::VecDeque; +use alloc::vec::Vec; +use spin::{Lazy, Mutex}; + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use crate::cpu_data::this_cpu_data; +use crate::error::HvResult; +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use crate::event::send_event; +use crate::memory::Frame; +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use crate::{ + consts::{IPI_EVENT_DWC_MSI_INJECT, MAX_CPU_NUM}, + device::irqchip::inject_irq, + hypercall::SGI_IPI_ID, +}; + +// DBI MSI register offsets +pub const PCIE_MSI_ADDR_LO: usize = 0x820; +pub const PCIE_MSI_ADDR_HI: usize = 0x824; +pub const PCIE_MSI_INTR0_ENABLE: usize = 0x828; +pub const PCIE_MSI_INTR0_MASK: usize = 0x82c; +pub const PCIE_MSI_INTR0_STATUS: usize = 0x830; + +/// DesignWare native MSI domain configuration +/// Each PCIe RC (domain) has a set of 32 MSI vectors that can be distributed +/// across multiple VMs. This structure manages the allocation of these vectors +/// and provides storage for the MSI doorbell address. +#[derive(Debug, Copy, Clone)] +pub struct DwMsiBitOwner { + /// Target vCPU used for injection when forwarding this MSI + pub target_cpu: usize, + /// First hardware MSI bit in this allocation + pub start_hwirq_bit: u32, + /// Number of contiguous vectors in this allocation + pub num_vectors: u32, +} + +impl DwMsiBitOwner { + #[inline] + pub fn contains_hwbit(&self, hwbit: u32) -> bool { + hwbit >= self.start_hwirq_bit && hwbit < self.start_hwirq_bit + self.num_vectors + } +} + +#[derive(Debug)] +pub struct DwMsiDomain { + /// Next MSI vector index to allocate (0-31) + /// When a VM allocates N vectors, it gets indices [next_alloc, next_alloc+N) + pub next_alloc: u32, + + /// Physical page frame for the doorbell address + /// Each domain has one unique doorbell that is written by hvisor + pub doorbell_frame: Frame, + + /// Shared physical interrupt line used by this DWC RC MSI block + pub irq: u32, + + /// Per-zone allocation records for fast hwbit -> zone/cpu lookup + pub bit_owners: Vec, +} + +impl DwMsiDomain { + /// Create a new DW MSI domain for a PCIe RC + pub fn new(irq: u32) -> HvResult { + let doorbell_frame = Frame::new_zero()?; + + Ok(Self { + next_alloc: 0, + doorbell_frame, + irq, + bit_owners: Vec::new(), + }) + } + + /// Allocate a contiguous range of MSI vectors for a VM + /// Returns the starting vector index if successful, or error if not enough vectors available + pub fn allocate(&mut self, num_vectors: u32) -> HvResult { + // Check if there are enough vectors left (32 total vectors per domain) + if self.next_alloc + num_vectors > 32 { + return hv_result_err!(EINVAL, "Not enough MSI vectors available in domain"); + } + + let alloc_offset = self.next_alloc; + self.next_alloc += num_vectors; + + Ok(alloc_offset) + } + + /// Allocate MSI vectors for a zone and record the ownership mapping. + pub fn allocate_for_cpu(&mut self, target_cpu: usize, num_vectors: u32) -> HvResult { + let start_hwirq_bit = self.allocate(num_vectors)?; + self.bit_owners.push(DwMsiBitOwner { + target_cpu, + start_hwirq_bit, + num_vectors, + }); + info!( + "DW MSI owner added: cpu {}, vectors {}, range [{}..{}), total_owner_records {}", + target_cpu, + num_vectors, + start_hwirq_bit, + start_hwirq_bit + num_vectors, + self.bit_owners.len() + ); + Ok(start_hwirq_bit) + } + + /// Find the owner record by hardware MSI bit. + pub fn find_owner_by_hwbit(&self, hwbit: u32) -> Option { + self.bit_owners + .iter() + .copied() + .find(|owner| owner.contains_hwbit(hwbit)) + } + + /// Get the physical address of the doorbell for this domain + pub fn doorbell_paddr(&self) -> u64 { + self.doorbell_frame.start_paddr() as u64 + } +} + +/// Global storage for DW MSI domain configurations +/// Indexed by domain ID +pub static DW_MSI_DOMAINS: Lazy>> = + Lazy::new(|| Mutex::new(BTreeMap::new())); + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +static DWC_MSI_IPI_PENDING_IRQS: Lazy>>> = Lazy::new(|| { + let mut queues = Vec::with_capacity(MAX_CPU_NUM); + for _ in 0..MAX_CPU_NUM { + queues.push(VecDeque::new()); + } + Mutex::new(queues) +}); + +/// Initialize DW MSI domain for a given domain ID +pub fn init_dwc_msi_domain(domain_id: u8, irq: u32) -> HvResult<()> { + let domain = DwMsiDomain::new(irq)?; + let doorbell_paddr = domain.doorbell_paddr(); + DW_MSI_DOMAINS.lock().insert(domain_id, domain); + info!( + "Initialized DW MSI domain {} with doorbell at {:#x}, irq {}", + domain_id, doorbell_paddr, irq + ); + Ok(()) +} + +/// Get mutable reference to a DW MSI domain +pub fn get_dwc_msi_domain_mut( + domain_id: u8, +) -> Option>> { + let domains = DW_MSI_DOMAINS.lock(); + if domains.contains_key(&domain_id) { + drop(domains); + Some(DW_MSI_DOMAINS.lock()) + } else { + None + } +} + +/// Get the doorbell physical address for a specific domain +/// Returns 0 if domain not found +pub fn get_domain_doorbell_paddr(domain_id: u8) -> u64 { + let domains = DW_MSI_DOMAINS.lock(); + domains + .get(&domain_id) + .map(|domain| domain.doorbell_paddr()) + .unwrap_or(0) +} + +/// Get the shared hardware MSI IRQ for a specific domain +/// Returns 0 if domain not found +pub fn get_domain_msi_irq(domain_id: u8) -> u32 { + let domains = DW_MSI_DOMAINS.lock(); + domains + .get(&domain_id) + .map(|domain| domain.irq) + .unwrap_or(0) +} + +/// Check whether an IRQ is used as a shared DWC MSI hardware interrupt +pub fn is_dwc_msi_irq(irq: u32) -> bool { + let domains = DW_MSI_DOMAINS.lock(); + domains.values().any(|domain| domain.irq == irq) +} + +/// Check whether an IRQ matches any configured DWC MSI hardware interrupt. +pub fn is_dwc_msi_hwirq(irq: u32) -> bool { + is_dwc_msi_irq(irq) +} + +/// Return all domain IDs that share the specified DWC MSI hardware IRQ. +pub fn get_domains_by_irq(irq: u32) -> Vec { + let domains = DW_MSI_DOMAINS.lock(); + domains + .iter() + .filter_map(|(domain_id, domain)| { + if domain.irq == irq { + Some(*domain_id) + } else { + None + } + }) + .collect() +} + +/// Find bit ownership for a specific domain and hardware MSI bit. +pub fn get_domain_owner_by_hwirq_bit(domain_id: u8, hwbit: u32) -> Option { + let domains = DW_MSI_DOMAINS.lock(); + domains + .get(&domain_id) + .and_then(|domain| domain.find_owner_by_hwbit(hwbit)) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn get_domain_dbi_base(domain_id: u8) -> Option { + let ecam_base = crate::platform::ROOT_PCI_CONFIG + .iter() + .find(|cfg| cfg.domain == domain_id) + .map(|cfg| cfg.ecam_base)?; + + crate::platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|cfg| cfg.ecam_base == ecam_base) + .map(|cfg| cfg.dbi_base as usize) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn first_set_bit(mask: u32) -> Option { + if mask == 0 { + None + } else { + Some(mask.trailing_zeros()) + } +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn find_target_cpu(domain_id: u8, irq_bit: usize) -> Option { + get_domain_owner_by_hwirq_bit(domain_id, irq_bit as u32).map(|owner| owner.target_cpu) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn dwc_msi_pending_irq_bit(domain_id: u8) -> Option { + let dbi_base = get_domain_dbi_base(domain_id)?; + let status = + unsafe { core::ptr::read_volatile((dbi_base + PCIE_MSI_INTR0_STATUS) as *const u32) }; + first_set_bit(status) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +pub fn get_domain_id_by_irq(irq: u32) -> Option { + let domains = DW_MSI_DOMAINS.lock(); + domains.iter().find_map(|(domain_id, domain)| { + if domain.irq == irq { + Some(*domain_id) + } else { + None + } + }) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn enqueue_dwc_msi_ipi_irq(target_cpu: usize, irq_id: usize) -> bool { + if target_cpu >= MAX_CPU_NUM { + error!( + "DWC MSI enqueue failed: invalid target cpu {}, irq {}", + target_cpu, irq_id + ); + return false; + } + + let mut queues = DWC_MSI_IPI_PENDING_IRQS.lock(); + if let Some(queue) = queues.get_mut(target_cpu) { + queue.push_back(irq_id); + true + } else { + false + } +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn pop_dwc_msi_ipi_irq(cpu_id: usize) -> Option { + if cpu_id >= MAX_CPU_NUM { + return None; + } + + let mut queues = DWC_MSI_IPI_PENDING_IRQS.lock(); + queues.get_mut(cpu_id).and_then(|queue| queue.pop_front()) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +pub fn dwc_msi_transfer_and_inject(domain_id: u8, irq_id: usize) { + if let Some(irq_bit) = dwc_msi_pending_irq_bit(domain_id) { + if let Some(target_cpu) = find_target_cpu(domain_id, irq_bit as usize) { + if target_cpu == 0 { + inject_irq(irq_id, true); + } else { + if enqueue_dwc_msi_ipi_irq(target_cpu, irq_id) { + send_event(target_cpu, SGI_IPI_ID as usize, IPI_EVENT_DWC_MSI_INJECT); + } else { + error!( + "Failed to enqueue DWC MSI irq {} for target cpu {}", + irq_id, target_cpu + ); + } + } + } else { + error!("No target cpu found for DWC msi irq bit {}!", irq_bit); + } + } else { + error!("No pending DWC msi irq found!"); + } +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +pub fn handle_dwc_msi_inject_event() { + let cpu_id = this_cpu_data().id; + if let Some(irq_id) = pop_dwc_msi_ipi_irq(cpu_id) { + inject_irq(irq_id, true); + } else { + warn!("No pending DWC MSI IPI irq for cpu {}", cpu_id); + } +} diff --git a/src/pci/mod.rs b/src/pci/mod.rs index 7217cce5..a6d8e799 100644 --- a/src/pci/mod.rs +++ b/src/pci/mod.rs @@ -23,6 +23,9 @@ pub mod pci_handler; pub mod pci_struct; pub mod vpci_dev; +#[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] +pub mod dwc_msi; + #[cfg(test)] pub mod pci_test; diff --git a/src/pci/pci_access.rs b/src/pci/pci_access.rs index dc891bd9..d56b4149 100644 --- a/src/pci/pci_access.rs +++ b/src/pci/pci_access.rs @@ -987,7 +987,7 @@ pub enum BridgeField { LatencyTime, HeaderType, Bist, - Bar, + Bar(usize), PrimaryBusNumber, SecondaryBusNumber, SubordinateBusNumber, @@ -1023,7 +1023,7 @@ impl Debug for BridgeField { BridgeField::LatencyTime => write!(f, "LatencyTime"), BridgeField::HeaderType => write!(f, "HeaderType"), BridgeField::Bist => write!(f, "Bist"), - BridgeField::Bar => write!(f, "Bar"), + BridgeField::Bar(slot) => write!(f, "Bar({})", slot), BridgeField::PrimaryBusNumber => write!(f, "PrimaryBusNumber"), BridgeField::SecondaryBusNumber => write!(f, "SecondaryBusNumber"), BridgeField::SubordinateBusNumber => write!(f, "SubordinateBusNumber"), @@ -1061,7 +1061,7 @@ impl PciField for BridgeField { BridgeField::LatencyTime => 0x0d, BridgeField::HeaderType => 0x0e, BridgeField::Bist => 0x0f, - BridgeField::Bar => 0x10, + BridgeField::Bar(slot) => 0x10 + slot * 4, BridgeField::PrimaryBusNumber => 0x18, BridgeField::SecondaryBusNumber => 0x19, BridgeField::SubordinateBusNumber => 0x1a, @@ -1096,7 +1096,7 @@ impl PciField for BridgeField { BridgeField::LatencyTime => 1, BridgeField::HeaderType => 1, BridgeField::Bist => 1, - BridgeField::Bar => 4, + BridgeField::Bar(_) => 4, BridgeField::PrimaryBusNumber => 1, BridgeField::SecondaryBusNumber => 1, BridgeField::SubordinateBusNumber => 1, @@ -1133,7 +1133,8 @@ impl BridgeField { (0x0d, 1) => BridgeField::LatencyTime, (0x0e, 1) => BridgeField::HeaderType, (0x0f, 1) => BridgeField::Bist, - (0x10, 4) | (0x14, 4) => BridgeField::Bar, + (0x10, 4) => BridgeField::Bar(0), + (0x14, 4) => BridgeField::Bar(1), (0x18, 1) => BridgeField::PrimaryBusNumber, (0x19, 1) => BridgeField::SecondaryBusNumber, (0x1a, 1) => BridgeField::SubordinateBusNumber, diff --git a/src/pci/pci_config.rs b/src/pci/pci_config.rs index 56f6b7f9..61809321 100644 --- a/src/pci/pci_config.rs +++ b/src/pci/pci_config.rs @@ -77,6 +77,11 @@ pub static GLOBAL_PCIE_LIST: Lazy HvResult { warn!("begin {:#x?}", pci_config); + + // Track domains that have been initialized for DW MSI + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + let mut initialized_domains: alloc::vec::Vec = alloc::vec::Vec::new(); + #[cfg(any( feature = "ecam_pcie", feature = "dwc_pcie", @@ -167,6 +172,20 @@ pub fn hvisor_pci_init(pci_config: &[HvPciConfig]) -> HvResult { .lock() .insert(node.get_bdf(), ArcRwLockVirtualPciConfigSpace::new(node)); } + + // Initialize DW MSI domain for this domain ID (only once per domain) + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if !initialized_domains.contains(&domain) { + let msi_irq = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|cfg| cfg.ecam_base == rootcomplex_config.ecam_base) + .map(|cfg| cfg.dw_msi_irq as u32) + .unwrap_or(0); + crate::pci::dwc_msi::init_dwc_msi_domain(domain, msi_irq)?; + initialized_domains.push(domain); + } + } } info!("hvisor pci init done \n{:#?}", GLOBAL_PCIE_LIST); Ok(()) @@ -257,6 +276,7 @@ impl Zone { let mut vbus_pre = bus_range_begin; let mut bus_pre = bus_range_begin; let mut device_pre = 0u8; + let mut domain_msi_count: u32 = 0; let mut vdevice_pre = 0u8; /* @@ -275,48 +295,49 @@ impl Zone { */ for dev_config in &filtered_devices { let bdf = Bdf::new_from_config(*dev_config); - // let bus = bdf.bus(); - // let device = bdf.device(); - // let function = bdf.function(); - - // /* - // * vfunction = if (bus != bus_pre || device != device_pre) && function != 0 - // * In practice, remapping is performed only for new devices whose function is not 0; - // * however, the check for function != 0 does not affect the final result. - // */ - // let vfunction = if bus != bus_pre || device != device_pre { - // 0 - // } else { - // function - // }; - - // let vbus = if bus > bus_pre { - // vbus_pre += 1; - // vbus_pre - // } else { - // vbus_pre - // }; - - // // Remap device number to be contiguous, starting from 0 - // let vdevice = if bus != bus_pre || device != device_pre { - // // New bus or new device, increment device counter - // if bus != bus_pre { - // vdevice_pre = 0; - // } else { - // vdevice_pre += 1; - // } - // vdevice_pre - // } else { - // // Same bus and device, keep the same virtual device number - // vdevice_pre - // }; - - // let vbdf = Bdf::new(bdf.domain(), vbus, vdevice, vfunction); - - // device_pre = device; - // bus_pre = bus; + let bus = bdf.bus(); + let device = bdf.device(); + let function = bdf.function(); + + /* + * vfunction = if (bus != bus_pre || device != device_pre) && function != 0 + * In practice, remapping is performed only for new devices whose function is not 0; + * however, the check for function != 0 does not affect the final result. + */ + let vfunction = if bus != bus_pre || device != device_pre { + 0 + } else { + function + }; + + let vbus = if bus > bus_pre { + vbus_pre += 1; + vbus_pre + } else { + vbus_pre + }; + + // Remap device number to be contiguous, starting from 0 + let vdevice = if bus != bus_pre || device != device_pre { + // New bus or new device, increment device counter + if bus != bus_pre { + vdevice_pre = 0; + } else { + vdevice_pre += 1; + } + vdevice_pre + } else { + // Same bus and device, keep the same virtual device number + vdevice_pre + }; + + let vbdf = Bdf::new(bdf.domain(), vbus, vdevice, vfunction); + + device_pre = device; + bus_pre = bus; // TODO: adjust vbdf will cause line interrupt injecet error, so remove it temporarily + #[cfg(not(feature = "dwc_msi"))] let vbdf = bdf; info!("set bdf {:#?} to vbdf {:#?}", bdf, vbdf); @@ -354,6 +375,8 @@ impl Zone { { let mut vdev = dev.read().config_space.clone(); vdev.set_vbdf(vbdf); + let msi_count = vdev.get_msi_count(); + domain_msi_count += msi_count; inner.vpci_bus_mut().insert(vbdf, vdev); } else { // Check if device is already allocated to another zone @@ -361,6 +384,8 @@ impl Zone { dev.set_zone_id(Some(_zone_id as u32)); let mut vdev_inner = dev.read().config_space.clone(); vdev_inner.set_vbdf(vbdf); + let msi_count = vdev_inner.get_msi_count(); + domain_msi_count += msi_count; inner.vpci_bus_mut().insert(vbdf, vdev_inner); } else { warn!( @@ -395,6 +420,48 @@ impl Zone { } } } + + // After processing all devices for this domain, allocate hardware MSI bits + if domain_msi_count > 0 { + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + // Get the DW MSI domain allocator and allocate hwbit + if let Some(mut domain_lock) = + crate::pci::dwc_msi::get_dwc_msi_domain_mut(target_domain) + { + if let Some(domain_msi) = domain_lock.get_mut(&target_domain) { + let zone_cpu_set = inner.cpu_set(); + let target_cpu = zone_cpu_set.first_cpu().unwrap_or(0); + match domain_msi.allocate_for_cpu(target_cpu, domain_msi_count) { + Ok(hwirq_bit) => { + // Register the MSI info for this domain + inner.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + hwirq_bit, + ); + } + Err(e) => { + warn!( + "Failed to allocate MSI for domain {}: {:?}", + target_domain, e + ); + } + } + } + } + } + + #[cfg(not(feature = "dwc_msi"))] + { + // Without dwc_msi feature, just register without hardware bit allocation + inner.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + 0, // hwirq_bit is 0 when not using dwc_msi + ); + } + } } info!("vpci bus init done\n {:#x?}", inner.vpci_bus()); Ok(()) @@ -427,11 +494,16 @@ impl Zone { } #[cfg(feature = "dwc_pcie")] { + // Encode domain_id into the arg parameter: arg = ecam_base + domain_id + // Since ecam_base is 4KB aligned, its low 12 bits are 0 + // domain_id (0-15) fits in the low bits without interfering + let encoded_arg = + rootcomplex_config.ecam_base as usize + (rootcomplex_config.domain as usize); inner.mmio_region_register( rootcomplex_config.ecam_base as usize, rootcomplex_config.ecam_size as usize, mmio_vpci_handler_dbi, - rootcomplex_config.ecam_base as usize, + encoded_arg, ); let extend_config = platform::ROOT_DWC_ATU_CONFIG diff --git a/src/pci/pci_handler.rs b/src/pci/pci_handler.rs index af3c42b1..c42e9018 100644 --- a/src/pci/pci_handler.rs +++ b/src/pci/pci_handler.rs @@ -14,12 +14,19 @@ // Authors: // +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +use alloc::collections::btree_map::BTreeMap; use alloc::string::String; +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +use spin::Lazy; +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +use spin::Mutex; use crate::cpu_data::this_zone; use crate::error::HvResult; -use crate::memory::MMIOAccess; +use crate::memory::{mmio_perform_access, MMIOAccess}; use crate::memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}; +use crate::pci::pci_struct::CapabilityType; use crate::zone::is_this_root_zone; use super::pci_access::{BridgeField, EndpointField, HeaderType, PciField, PciMemType}; @@ -32,20 +39,28 @@ use super::PciConfigAddress; use crate::zone::this_zone_id; #[cfg(feature = "dwc_pcie")] -use crate::{ - memory::mmio_perform_access, - pci::config_accessors::{ - dwc::DwcConfigRegionBackend, - dwc_atu::{ - AtuType, AtuUnroll, ATU_BASE, ATU_ENABLE_BIT, ATU_REGION_SIZE, PCIE_ATU_UNR_LIMIT, - PCIE_ATU_UNR_LOWER_BASE, PCIE_ATU_UNR_LOWER_TARGET, PCIE_ATU_UNR_REGION_CTRL1, - PCIE_ATU_UNR_REGION_CTRL2, PCIE_ATU_UNR_UPPER_BASE, PCIE_ATU_UNR_UPPER_LIMIT, - PCIE_ATU_UNR_UPPER_TARGET, - }, - PciRegionMmio, +use crate::pci::config_accessors::{ + dwc::DwcConfigRegionBackend, + dwc_atu::{ + AtuType, AtuUnroll, ATU_BASE, ATU_ENABLE_BIT, ATU_REGION_SIZE, PCIE_ATU_UNR_LIMIT, + PCIE_ATU_UNR_LOWER_BASE, PCIE_ATU_UNR_LOWER_TARGET, PCIE_ATU_UNR_REGION_CTRL1, + PCIE_ATU_UNR_REGION_CTRL2, PCIE_ATU_UNR_UPPER_BASE, PCIE_ATU_UNR_UPPER_LIMIT, + PCIE_ATU_UNR_UPPER_TARGET, }, + PciRegionMmio, }; +#[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] +use super::dwc_msi::{ + PCIE_MSI_ADDR_HI, PCIE_MSI_ADDR_LO, PCIE_MSI_INTR0_ENABLE, PCIE_MSI_INTR0_MASK, + PCIE_MSI_INTR0_STATUS, +}; + +#[cfg(not(feature = "dwc_msi"))] +const PCIE_MSI_ADDR_LO: usize = 0x820; +#[cfg(not(feature = "dwc_msi"))] +const PCIE_MSI_INTR0_STATUS: usize = 0x830; + macro_rules! pci_log { ($($arg:tt)*) => { // info!($($arg)*); @@ -146,6 +161,155 @@ fn handle_virt_pci_request( } } +fn handle_cap_access( + dev: ArcRwLockVirtualPciConfigSpace, + offset: PciConfigAddress, + size: usize, + value: usize, + is_write: bool, + is_dev_belong_to_zone: bool, +) -> HvResult> { + // Handle capability region access (offset >= 0x34) + if offset == 0x34 { + // Cap Pointer register (may be accessed as different sizes) + if is_dev_belong_to_zone { + // Direct pass through to hardware + if is_write { + dev.write_hw(offset, size, value)?; + Ok(None) + } else { + Ok(Some(dev.read_hw(offset, size)?)) + } + } else { + // Device not belong to zone, return 0 (no capability) + if is_write { + Ok(None) + } else { + Ok(Some(0)) + } + } + } else { + // Other capability region offsets + // Try to find the capability that contains this offset + let cap_info = dev.with_cap(|capabilities| { + capabilities + .range(..=offset as u64) + .next_back() + .map(|(cap_offset, cap)| (*cap_offset, cap.get_type())) + }); + + if let Some((cap_offset, cap_type)) = cap_info { + let cap_offset = cap_offset as usize; + let relative_offset = offset as usize - cap_offset; + + if cap_type == CapabilityType::Msi { + let vbdf = dev.get_vbdf(); + let _domain_id = vbdf.domain(); + + let is_msi_64 = dev.with_cap(|capabilities| { + capabilities + .get(&(cap_offset as u64)) + .and_then(|cap| cap.with_region(|region| region.read(0x02, 2).ok())) + .map(|ctrl| (ctrl & (1 << 7)) != 0) + .unwrap_or(false) + }); + + let _is_addr_low = matches!(relative_offset, 4 | 5 | 6 | 7); + let _is_addr_high = is_msi_64 && matches!(relative_offset, 8 | 9 | 10 | 11); + let _is_msg_data = if is_msi_64 { + matches!(relative_offset, 12 | 13) + } else { + matches!(relative_offset, 8 | 9) + }; + + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if is_write { + if _is_addr_low { + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff00000000; + msi_info.set_doorbell(current | (value as u64)); + }); + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + dev.write_hw(offset, size, (hw_paddr & 0xffffffff) as usize)?; + return Ok(None); + } + if _is_addr_high { + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff; + msi_info.set_doorbell(current | ((value as u64) << 32)); + }); + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + dev.write_hw(offset, size, ((hw_paddr >> 32) & 0xffffffff) as usize)?; + return Ok(None); + } + if _is_msg_data { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = vbus.domain_msi_info().get(&_domain_id) { + let hw_value = + (value as u32).wrapping_add(domain_msi_info.hwirq_bit); + dev.write_hw(offset, size, hw_value as usize)?; + } else { + dev.write_hw(offset, size, value)?; + } + return Ok(None); + } + } else { + if _is_addr_low { + let vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi_info| msi_info.msi_doorbell) + .unwrap_or(0); + return Ok(Some((vm_doorbell & 0xffffffff) as usize)); + } + if _is_addr_high { + let vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi_info| msi_info.msi_doorbell) + .unwrap_or(0); + return Ok(Some(((vm_doorbell >> 32) & 0xffffffff) as usize)); + } + if _is_msg_data { + let hw_value = dev.read_hw(offset, size)?; + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = vbus.domain_msi_info().get(&_domain_id) { + let hwirq_bit = domain_msi_info.hwirq_bit; + let hw_vec = hw_value as u32; + let virq_bit = if hw_vec >= hwirq_bit { + hw_vec - hwirq_bit + } else { + hw_vec + }; + return Ok(Some(virq_bit as usize)); + } + return Ok(Some(hw_value)); + } + } + } + } + + // Direct pass through to hardware for all cap access + if is_write { + dev.write_hw(offset, size, value)?; + Ok(None) + } else { + Ok(Some(dev.read_hw(offset, size)?)) + } + } else { + // No capability found at this offset + Ok(None) + } + } +} + fn handle_endpoint_access( dev: ArcRwLockVirtualPciConfigSpace, field: EndpointField, @@ -208,6 +372,30 @@ fn handle_endpoint_access( * as previously described */ let bar_type = dev.with_bar_ref(slot, |bar| bar.get_type()); + + // Check if this BAR contains MSIX table (only when dwc_msi feature is enabled) + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + let is_msix_bar = { + let msix_check_slot = if bar_type == PciMemType::Mem64High && slot > 0 { + slot - 1 + } else { + slot + }; + + dev.read() + .get_msi_info() + .and_then(|msi_info| { + msi_info + .msix_info + .as_ref() + .map(|msix| msix.bar_id == msix_check_slot as u8) + }) + .unwrap_or(false) + }; + + #[cfg(not(feature = "dwc_msi"))] + let is_msix_bar = false; + if bar_type != PciMemType::default() { if is_write { if is_direct && is_root { @@ -225,6 +413,8 @@ fn handle_endpoint_access( | (bar_type == PciMemType::Mem64High) | (bar_type == PciMemType::Io) { + let old_vaddr = + dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; let new_vaddr = { if bar_type == PciMemType::Mem64High { /* last 4bit is flag, not address and need ignore @@ -254,6 +444,90 @@ fn handle_endpoint_access( if bar_type == PciMemType::Mem64High { dev.with_bar_ref_mut(slot - 1, |bar| bar.set_value(new_vaddr)); } + + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } + + let bar_size = { + let size = dev.with_bar_ref(slot, |bar| bar.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + + if is_msix_bar { + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + guard.mmio_region_register( + new_vaddr_aligned as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + {} + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + } + drop(guard); + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } } } } else if is_dev_belong_to_zone { @@ -293,8 +567,30 @@ fn handle_endpoint_access( }); } - let paddr = - dev.with_bar_ref(slot, |bar| bar.get_value64()) as HostPhysAddr; + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } let bar_size = { let size = dev.with_bar_ref(slot, |bar| bar.get_size()); if crate::memory::addr::is_aligned(size as usize) { @@ -312,20 +608,37 @@ fn handle_endpoint_access( let zone = this_zone(); let mut guard = zone.write(); - let gpm = guard.gpm_mut(); - if !gpm - .try_delete(old_vaddr.try_into().unwrap(), bar_size as usize) - .is_ok() - { - // warn!("delete bar {}: can not found 0x{:x}", slot, old_vaddr); + if is_msix_bar { + // Remove old MSIX handler if it exists + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + // Register new MSIX handler at new address + guard.mmio_region_register( + new_vaddr as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + // Delete old gpm mapping if it exists + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + { + // warn!("delete bar {}: can not found 0x{:x}", slot, old_vaddr); + } + // Insert new gpm mapping at new address + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; } - gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( - new_vaddr as GuestPhysAddr, - paddr as HostPhysAddr, - bar_size as _, - MemFlags::READ | MemFlags::WRITE, - ))?; drop(guard); /* after update gpm, mem barrier is needed */ @@ -411,76 +724,95 @@ fn handle_endpoint_access( configvalue.set_rom_value(value as u32); }); - if value & 0xfffff800 != 0xfffff800 { + // Check if this is size probe (all 1s in BA field, bits 31-11) + let is_size_probe = (value & 0xfffff800) == 0xfffff800; + // Check if ROM enable bit (bit 0) is set + let rom_enabled = (value & 0x1) != 0; + + if !is_size_probe { let old_vaddr = dev.with_rom_ref(|rom| rom.get_virtual_value64()) & !0xf; let new_vaddr = (value as u64) & !0xf; - dev.with_rom_ref_mut(|rom| rom.set_virtual_value(new_vaddr)); + // Only perform mapping operations if ROM enable bit is set + if rom_enabled { + // set new_value not new_vaddr, because `set_virtual_value` will not add enable flag automatically + dev.with_rom_ref_mut(|rom| rom.set_virtual_value(value as _)); + + // Write to hardware with enable bit set + // Get the current ROM value from hardware and set bit 0 + // And not to use rom.set_value() + let hw_value = dev.with_rom_ref(|rom| rom.get_value64()); + let hw_value_enabled = hw_value | 0x1; // Set enable bit + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + hw_value_enabled as usize, + )?; + dev.with_rom_ref_mut(|rom| rom.set_value(hw_value_enabled)); - let paddr = if is_root { - dev.with_rom_ref_mut(|rom| rom.set_value(new_vaddr)); - new_vaddr as HostPhysAddr - } else { - dev.with_rom_ref(|rom| rom.get_value64()) as HostPhysAddr - }; + let paddr = + dev.with_rom_ref(|rom| rom.get_value64()) as HostPhysAddr; - let rom_size = { - let size = dev.with_rom_ref(|rom| rom.get_size()); - if crate::memory::addr::is_aligned(size as usize) { - size - } else { - crate::memory::PAGE_SIZE as u64 - } - }; - let new_vaddr = if !crate::memory::addr::is_aligned(new_vaddr as usize) - { - crate::memory::addr::align_up(new_vaddr as usize) as u64 - } else { - new_vaddr as u64 - }; + let rom_size = { + let size = dev.with_rom_ref(|rom| rom.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; - let zone = this_zone(); - let mut guard = zone.write(); - let gpm = guard.gpm_mut(); + let zone = this_zone(); + let mut guard = zone.write(); + let gpm = guard.gpm_mut(); - if !gpm - .try_delete(old_vaddr.try_into().unwrap(), rom_size as usize) - .is_ok() - { - // warn!("delete rom bar: can not found 0x{:x}", old_vaddr); - } - gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( - new_vaddr as GuestPhysAddr, - paddr as HostPhysAddr, - rom_size as _, - MemFlags::READ | MemFlags::WRITE, - ))?; - drop(guard); - /* after update gpm, mem barrier is needed - */ - #[cfg(target_arch = "aarch64")] - unsafe { - core::arch::asm!("isb"); - core::arch::asm!("tlbi vmalls12e1is"); - core::arch::asm!("dsb nsh"); - } - /* after update gpm, need to flush iommu table - * in x86_64 - */ - #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] - { - let vbdf = dev.get_vbdf(); - crate::device::iommu::flush( - this_zone_id(), - vbdf.bus, - (vbdf.device << 3) + vbdf.function, - ); - } - #[cfg(target_arch = "riscv64")] - unsafe { - // TOOD: add remote fence support (using sbi rfence spec?) - core::arch::asm!("hfence.gvma"); + if !gpm + .try_delete(old_vaddr.try_into().unwrap(), rom_size as usize) + .is_ok() + { + // warn!("delete rom bar: can not found 0x{:x}", old_vaddr); + } + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + rom_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + drop(guard); + /* after update gpm, mem barrier is needed + */ + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + /* after update gpm, need to flush iommu table + * in x86_64 + */ + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + #[cfg(target_arch = "riscv64")] + unsafe { + // TOOD: add remote fence support (using sbi rfence spec?) + core::arch::asm!("hfence.gvma"); + } + } else { + // ROM disabled } } } @@ -515,102 +847,605 @@ fn handle_endpoint_access( } fn handle_pci_bridge_access( - _dev: ArcRwLockVirtualPciConfigSpace, - _field: BridgeField, - _is_write: bool, -) -> HvResult> { - Ok(None) -} - -/* - * is_direct: if true, root can allocate resource for device belonging - * to ohter zone but can't drive it - * is_root: if the access is from the root zone - * is_dev_belong_to_zone: if the access is from the device that belongs to the zone - */ -fn handle_config_space_access( dev: ArcRwLockVirtualPciConfigSpace, - mmio: &mut MMIOAccess, - offset: PciConfigAddress, + field: BridgeField, + value: usize, + is_write: bool, is_direct: bool, is_root: bool, is_dev_belong_to_zone: bool, -) -> HvResult { - let is_write = mmio.is_write; +) -> HvResult> { + match field { + BridgeField::Bar(slot) => { + let bar_type = dev.with_bar_ref(slot, |bar| bar.get_type()); - // the lenth of access and control bits are limited by BIT_LENTH - if (offset as usize) >= BIT_LENTH { - warn!("invalid pci offset {:#x}", offset); - if !is_write { - mmio.value = 0; - } - return Ok(()); - } + // Check if this BAR contains MSIX table (only when dwc_msi feature is enabled) + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + let is_msix_bar = { + let msix_check_slot = if bar_type == PciMemType::Mem64High && slot > 0 { + slot - 1 + } else { + slot + }; - let size = mmio.size; - let value = mmio.value; + dev.read() + .get_msi_info() + .and_then(|msi_info| { + msi_info + .msix_info + .as_ref() + .map(|msix| msix.bar_id == msix_check_slot as u8) + }) + .unwrap_or(false) + }; - let vbdf = dev.get_bdf(); - let dev_type = dev.get_dev_type(); + #[cfg(not(feature = "dwc_msi"))] + let is_msix_bar = false; - if is_root || is_dev_belong_to_zone { - match dev.access(offset, size) { - false => { - // Hardware access path - pci_log!( - "hw vbdf {:#?} reg 0x{:x} try {} {}", - vbdf, - offset, - if is_write { "write" } else { "read" }, - if is_write { - format!("0x{:x}", mmio.value) - } else { - String::new() - } - ); + if bar_type != PciMemType::default() { if is_write { - dev.write_hw(offset, size, value)?; - } else { - mmio.value = dev.read_hw(offset, size).unwrap(); - } - } - true => { - // Emulation access path - pci_log!( - "emu vbdf {:#?} reg 0x{:x} try {} {}", - vbdf, - offset, - if is_write { "write" } else { "read" }, - if is_write { - format!(" 0x{:x}", mmio.value) - } else { - String::new() - } + if is_direct && is_root { + // direct mode and root zone, update resources directly + dev.with_config_value_mut(|configvalue| { + configvalue.set_bar_value(slot, value as u32); + }); + if (value & 0xfffffff0) != 0xfffffff0 { + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + value, + )?; + if (bar_type == PciMemType::Mem32) + | (bar_type == PciMemType::Mem64High) + | (bar_type == PciMemType::Io) + { + let old_vaddr = + dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; + let new_vaddr = { + if bar_type == PciMemType::Mem64High { + let low_value = dev + .with_config_value(|cv| cv.get_bar_value(slot - 1)) + as u64; + let high_value = (value as u32 as u64) << 32; + (low_value | high_value) & !0xf + } else { + (value as u64) & !0xf + } + }; + + // set virt_value + dev.with_bar_ref_mut(slot, |bar| bar.set_virtual_value(new_vaddr)); + if bar_type == PciMemType::Mem64High { + dev.with_bar_ref_mut(slot - 1, |bar| { + bar.set_virtual_value(new_vaddr) + }); + } + + // set value + dev.with_bar_ref_mut(slot, |bar| bar.set_value(new_vaddr)); + if bar_type == PciMemType::Mem64High { + dev.with_bar_ref_mut(slot - 1, |bar| bar.set_value(new_vaddr)); + } + + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } + + let bar_size = { + let size = dev.with_bar_ref(slot, |bar| bar.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + + if is_msix_bar { + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + guard.mmio_region_register( + new_vaddr_aligned as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + {} + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + } + drop(guard); + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + } + } + } else if is_dev_belong_to_zone { + // normal mode, update virt resources + dev.with_config_value_mut(|configvalue| { + configvalue.set_bar_value(slot, value as u32); + }); + if (value & 0xfffffff0) != 0xfffffff0 { + if (bar_type == PciMemType::Mem32) + | (bar_type == PciMemType::Mem64High) + | (bar_type == PciMemType::Io) + { + let old_vaddr = + dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; + let new_vaddr = { + if bar_type == PciMemType::Mem64High { + let low_value = dev + .with_config_value(|cv| cv.get_bar_value(slot - 1)) + as u64; + let high_value = (value as u32 as u64) << 32; + (low_value | high_value) & !0xf + } else { + (value as u64) & !0xf + } + }; + + dev.with_bar_ref_mut(slot, |bar| bar.set_virtual_value(new_vaddr)); + if bar_type == PciMemType::Mem64High { + dev.with_bar_ref_mut(slot - 1, |bar| { + bar.set_virtual_value(new_vaddr) + }); + } + + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } + let bar_size = { + let size = dev.with_bar_ref(slot, |bar| bar.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + + if is_msix_bar { + // Remove old MSIX handler if it exists + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + // Register new MSIX handler at new address + guard.mmio_region_register( + new_vaddr_aligned as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + // Delete old gpm mapping if it exists + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + { + // warn!("delete bar {}: can not found 0x{:x}", slot, old_vaddr); + } + // Insert new gpm mapping at new address + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + } + drop(guard); + /* after update gpm, mem barrier is needed + */ + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + /* after update gpm, need to flush iommu table + * in x86_64 + */ + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + } + } + } + Ok(None) + } else { + // read bar + if (dev.with_config_value(|configvalue| configvalue.get_bar_value(slot)) + & 0xfffffff0) + == 0xfffffff0 + { + /* + * tmp_value being 0xFFFF_FFFF means that Linux is attempting to determine the BAR size. + * The value of tmp_value is used directly here because Linux will rewrite this register later, + * so the Hvisor does not need to preserve any additional state. + */ + Ok(Some( + dev.with_bar_ref(slot, |bar| bar.get_size_with_flag()) as usize + )) + } else { + Ok(Some( + dev.with_config_value(|configvalue| configvalue.get_bar_value(slot)) + as usize, + )) + } + } + } else { + Ok(None) + } + } + BridgeField::ExpansionRomBar => { + // rom is same with bar + let rom_type = dev.with_rom_ref(|rom| rom.get_type()); + if rom_type == PciMemType::Rom { + if is_write { + if is_direct && is_root { + dev.with_config_value_mut(|configvalue| { + configvalue.set_rom_value(value as u32); + }); + if value & 0xfffff800 != 0xfffff800 { + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + value, + )?; + + let new_vaddr = (value as u64) & !0xf; + + // set virt_value + dev.with_rom_ref_mut(|rom| rom.set_virtual_value(new_vaddr)); + + // set value + dev.with_rom_ref_mut(|rom| rom.set_value(new_vaddr)); + } + } else if is_dev_belong_to_zone { + // normal mode, update virt resources + dev.with_config_value_mut(|configvalue| { + configvalue.set_rom_value(value as u32); + }); + + // Check if this is size probe (all 1s in BA field, bits 31-11) + let is_size_probe = (value & 0xfffff800) == 0xfffff800; + // Check if ROM enable bit (bit 0) is set + let rom_enabled = (value & 0x1) != 0; + + if !is_size_probe { + let old_vaddr = + dev.with_rom_ref(|rom| rom.get_virtual_value64()) & !0xf; + let new_vaddr = (value as u64) & !0xf; + + // Only perform mapping operations if ROM enable bit is set + if rom_enabled { + // set new_value not new_vaddr, because `set_virtual_value` will not add enable flag automatically + dev.with_rom_ref_mut(|rom| rom.set_virtual_value(value as _)); + + // Write to hardware with enable bit set + // Get the current ROM value from hardware and set bit 0 + // And not to use rom.set_value() + let hw_value = dev.with_rom_ref(|rom| rom.get_value64()); + let hw_value_enabled = hw_value | 0x1; // Set enable bit + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + hw_value_enabled as usize, + )?; + dev.with_rom_ref_mut(|rom| rom.set_value(hw_value_enabled)); + + let paddr = + dev.with_rom_ref(|rom| rom.get_value64()) as HostPhysAddr; + + let rom_size = { + let size = dev.with_rom_ref(|rom| rom.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + let gpm = guard.gpm_mut(); + + if !gpm + .try_delete(old_vaddr.try_into().unwrap(), rom_size as usize) + .is_ok() + { + // warn!("delete rom bar: can not found 0x{:x}", old_vaddr); + } + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + rom_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + drop(guard); + /* after update gpm, mem barrier is needed + */ + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + /* after update gpm, need to flush iommu table + * in x86_64 + */ + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + #[cfg(target_arch = "riscv64")] + unsafe { + // TOOD: add remote fence support (using sbi rfence spec?) + core::arch::asm!("hfence.gvma"); + } + } else { + // ROM disabled + } + } + } + Ok(None) + } else { + // read rom bar + if (dev.with_config_value(|configvalue| configvalue.get_rom_value())) + & 0xfffff800 + == 0xfffff800 + { + /* + * config_value being 0xFFFF_FFFF means that Linux is attempting to determine the ROM size. + * The value is used directly here because Linux will rewrite this register later, + * so the Hvisor does not need to preserve any additional state. + */ + Ok(Some( + dev.with_rom_ref(|rom| rom.get_size_with_flag()) as usize + )) + } else { + Ok(Some( + dev.with_config_value(|configvalue| configvalue.get_rom_value()) + as usize, + )) + } + } + } else { + Ok(None) + } + } + _ => Ok(None), + } +} + +/* + * is_direct: if true, root can allocate resource for device belonging + * to ohter zone but can't drive it + * is_root: if the access is from the root zone + * is_dev_belong_to_zone: if the access is from the device that belongs to the zone + */ +fn handle_config_space_access( + dev: ArcRwLockVirtualPciConfigSpace, + mmio: &mut MMIOAccess, + offset: PciConfigAddress, + is_direct: bool, + is_root: bool, + is_dev_belong_to_zone: bool, +) -> HvResult { + let is_write = mmio.is_write; + + // the lenth of access and control bits are limited by BIT_LENTH + if (offset as usize) >= BIT_LENTH { + warn!("invalid pci offset {:#x}", offset); + if !is_write { + mmio.value = 0; + } + return Ok(()); + } + + let size = mmio.size; + let value = mmio.value; + + let vbdf = dev.get_bdf(); + let dev_type = dev.get_dev_type(); + + if !is_write && dev.get_config_type() == HeaderType::Endpoint && offset == 0x150 && size == 4 { + // Hardcode for test: skip SR-IOV cap (0x160) by rewriting next cap ptr to 0x1a0. + mmio.value = dev.read_hw(offset, size).unwrap(); + mmio.value = mmio.value & 0x00ff_ffff; + mmio.value += 0x1a00_0000; + return Ok(()); + } + + if is_root || is_dev_belong_to_zone { + match dev.access(offset, size) { + false => { + // Hardware access path + pci_log!( + "hw vbdf {:#?} reg 0x{:x} try {} {}", + vbdf, + offset, + if is_write { "write" } else { "read" }, + if is_write { + format!("0x{:x}", mmio.value) + } else { + String::new() + } + ); + if is_write { + dev.write_hw(offset, size, value)?; + } else { + mmio.value = dev.read_hw(offset, size).unwrap(); + } + } + true => { + // Emulation access path + pci_log!( + "emu vbdf {:#?} reg 0x{:x} try {} {}", + vbdf, + offset, + if is_write { "write" } else { "read" }, + if is_write { + format!(" 0x{:x}", mmio.value) + } else { + String::new() + } ); match dev_type { VpciDevType::Physical => { let config_type = dev.get_config_type(); match config_type { HeaderType::Endpoint => { - if let Some(val) = handle_endpoint_access( - dev, - EndpointField::from(offset as usize, size), - value, - is_write, - is_direct, - is_root, - is_dev_belong_to_zone, - )? { - mmio.value = val; + // Check if this is capability region access (offset >= 0x40) + if (offset >= 0x40 && offset < 0x100) || (offset == 0x34) { + if let Some(val) = handle_cap_access( + dev, + offset, + size, + value, + is_write, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } + } else { + if let Some(val) = handle_endpoint_access( + dev, + EndpointField::from(offset as usize, size), + value, + is_write, + is_direct, + is_root, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } } } HeaderType::PciBridge => { - if let Some(val) = handle_pci_bridge_access( - dev, - BridgeField::from(offset as usize, size), - is_write, - )? { - mmio.value = val; + // Check if this is capability region access (offset >= 0x40) + if (offset >= 0x40 && offset < 0x100) || (offset == 0x34) { + if let Some(val) = handle_cap_access( + dev, + offset, + size, + value, + is_write, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } + } else { + if let Some(val) = handle_pci_bridge_access( + dev, + BridgeField::from(offset as usize, size), + value, + is_write, + is_direct, + is_root, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } } } _ => { @@ -751,10 +1586,38 @@ pub fn mmio_dwc_cfg_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { let dbi_region = PciRegionMmio::new(dbi_base, dbi_size); let dbi_backend = DwcConfigRegionBackend::new(dbi_region); - // warn!("atu config {:#?}", atu); + let pci_target = atu.pci_target(); + let target_bus = ((pci_target >> 24) & 0xff) as u8; + let target_device = ((pci_target >> 19) & 0x1f) as u8; + let target_function = ((pci_target >> 16) & 0x7) as u8; + + let mapped_target = { + let zone_guard = zone.read(); + let vbus = zone_guard.vpci_bus(); + vbus.devs_ref().values().find_map(|dev| { + let vbdf = dev.get_vbdf(); + if vbdf.bus() == target_bus + && vbdf.device() == target_device + && vbdf.function() == target_function + { + Some((dev.get_bdf(), vbdf, dev.read().get_base())) + } else { + None + } + }) + }; + + let mut hw_pci_target = pci_target; + if let Some((host_bdf, _, _)) = mapped_target { + hw_pci_target = ((host_bdf.bus() as u64) << 24) + + ((host_bdf.device() as u64) << 19) + + ((host_bdf.function() as u64) << 16); + } - // Call AtuUnroll to program the ATU - AtuUnroll::dw_pcie_prog_outbound_atu_unroll(&dbi_backend, &atu)?; + // Program hardware ATU with translated host target when remap exists. + let mut hw_atu = atu; + hw_atu.set_pci_target(hw_pci_target); + AtuUnroll::dw_pcie_prog_outbound_atu_unroll(&dbi_backend, &hw_atu)?; } let offset = (mmio.address & 0xfff) as PciConfigAddress; @@ -809,180 +1672,542 @@ pub fn mmio_dwc_cfg_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { pub fn mmio_vpci_handler_dbi(mmio: &mut MMIOAccess, _base: usize) -> HvResult { // info!("mmio_vpci_handler_dbi {:#x}", mmio.address); - /* 0x0-0x100 is outbound atu0 reg - * 0x100-0x200 is inbound atu0 reg just handle outbound right now - * so MAX is ATU_BASE + ATU_REGION_SIZE/2 - */ - if mmio.address >= ATU_BASE && mmio.address < ATU_BASE + ATU_REGION_SIZE / 2 { - let zone = this_zone(); - let mut guard = zone.write(); - let ecam_base = _base; - let atu_offset = mmio.address - ATU_BASE; + use crate::platform; + + // Decode domain_id and ecam_base from arg: + // arg = ecam_base + domain_id + // Since ecam_base is 4KB aligned (low 12 bits are 0), + // low bits contain domain_id, high bits contain ecam_base + let domain_id = (_base & 0xF) as u8; + let ecam_base = _base - (domain_id as usize); + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + { + // Delay mode semantics: + // - Before init-done, accesses to non-zero DBI regs are normally passed through. + // - For dwc_msi, MSI_ADDR_LO/HI are intercepted early so VM doorbell writes are cached. + // - Access to DBI reg 0 triggers hvisor PCI init, then normal DBI virtualization continues. + if !is_pci_init_done(domain_id) { + if mmio.address != 0 { + #[cfg(feature = "dwc_msi")] + match mmio.address { + PCIE_MSI_ADDR_LO | PCIE_MSI_ADDR_HI => { + let zone = this_zone(); + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + + if vbus.domain_msi_info().get(&domain_id).is_none() { + vbus.add_msi_count_for_domain(domain_id, 1, 0); + } + + if let Some(domain_msi_info) = + vbus.domain_msi_info_mut().get_mut(&domain_id) + { + if mmio.is_write { + let vm_doorbell = domain_msi_info.get_vm_doorbell(); + let new_val = if mmio.address == PCIE_MSI_ADDR_LO { + (vm_doorbell & 0xffffffff00000000) | (mmio.value as u64) + } else { + (vm_doorbell & 0xffffffff) | ((mmio.value as u64) << 32) + }; + domain_msi_info.set_vm_doorbell(new_val); + } else { + let vm_doorbell = domain_msi_info.get_vm_doorbell(); + mmio.value = if mmio.address == PCIE_MSI_ADDR_LO { + (vm_doorbell & 0xffffffff) as usize + } else { + ((vm_doorbell >> 32) & 0xffffffff) as usize + }; + } + } + + return Ok(()); + } + _ => {} + } + + mmio_perform_access(ecam_base, mmio); + return Ok(()); + } - // warn!("set atu0 register {:#X} value {:#X}", atu_offset, mmio.value); + let root_config = platform::platform_root_zone_config(); + let num_pci_bus = root_config.num_pci_bus as usize; - let atu = guard - .atu_configs_mut() - .get_atu_by_ecam_mut(ecam_base) - .unwrap(); + crate::pci::pci_config::hvisor_pci_init(&root_config.pci_config[..num_pci_bus])?; - // info!("atu config write {:#?}", atu); + let zone = crate::zone::root_zone(); + let mut inner = zone.write(); + inner.virtual_pci_mmio_init_delay(&root_config.pci_config, num_pci_bus); + inner.guest_pci_init_delay( + 0, + &root_config.alloc_pci_devs, + root_config.num_pci_devs, + &root_config.pci_config, + num_pci_bus, + )?; - if mmio.is_write { - if mmio.size == 4 { + #[cfg(feature = "dwc_msi")] + { + // Why this is inside init-delay only: + // before init-done, VM may have already written MSI_ADDR_LO/HI and those writes were + // cached (virtual doorbell) but did not program final hardware state. + // After hvisor_pci_init() completes, force HW LO/HI to hvisor-allocated doorbell. + // In non-delay mode, writes go through the normal MSI register handler below, + // and first LO/HI writes are translated/synced there, so this extra sync is unnecessary. + let hw_paddr = crate::pci::dwc_msi::get_domain_doorbell_paddr(domain_id); + if hw_paddr != 0 { + let mut hw_lo_write = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: (hw_paddr & 0xffffffff) as usize, + size: 4, + is_write: true, + }; + let mut hw_hi_write = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: ((hw_paddr >> 32) & 0xffffffff) as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_lo_write); + mmio_perform_access(ecam_base, &mut hw_hi_write); + } + } + + set_pci_init_done(domain_id); + info!( + "Hvisor PCI initialization complete for domain {}", + domain_id + ); + } + } + + // Read extend_config to get io_atu_index + let extend_config = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|cfg| cfg.ecam_base == ecam_base as u64); + + if let Some(extend_config) = extend_config { + let io_atu_index = extend_config.io_atu_index as usize; + let atu_base = ATU_BASE + io_atu_index * ATU_REGION_SIZE; + + /* Calculate outbound atu registers address range based on io_atu_index + * Each ATU has: 0x0-0x100 for outbound, 0x100-0x200 for inbound + * We only handle outbound now, so MAX is atu_base + ATU_REGION_SIZE/2 + */ + if mmio.address >= atu_base && mmio.address < atu_base + ATU_REGION_SIZE / 2 { + let zone = this_zone(); + let mut guard = zone.write(); + let atu_offset = mmio.address - atu_base; + + // warn!("set atu{} register {:#X} value {:#X}", io_atu_index, atu_offset, mmio.value); + + let atu = guard + .atu_configs_mut() + .get_atu_by_ecam_mut(ecam_base) + .unwrap(); + + // info!("atu config write {:#?}", atu); + + if mmio.is_write { + if mmio.size == 4 { + match atu_offset { + PCIE_ATU_UNR_REGION_CTRL1 => { + // info!("set atu{} region ctrl1 value {:#X}", io_atu_index, mmio.value); + atu.set_atu_type(AtuType::from_u8((mmio.value & 0xff) as u8)); + } + PCIE_ATU_UNR_REGION_CTRL2 => { + // Enable bit is written here, but we just track it + // The actual enable is handled by the driver + } + PCIE_ATU_UNR_LOWER_BASE => { + // info!("set atu{} lower base value {:#X}", io_atu_index, mmio.value); + atu.set_cpu_base( + (atu.cpu_base() & !0xffffffff) | (mmio.value as PciConfigAddress), + ); + } + PCIE_ATU_UNR_UPPER_BASE => { + // info!("set atu{} upper base value {:#X}", io_atu_index, mmio.value); + atu.set_cpu_base( + (atu.cpu_base() & 0xffffffff) + | ((mmio.value as PciConfigAddress) << 32), + ); + } + PCIE_ATU_UNR_LIMIT => { + // info!("set atu{} limit value {:#X}", io_atu_index, mmio.value); + atu.set_cpu_limit( + (atu.cpu_limit() & !0xffffffff) | (mmio.value as PciConfigAddress), + ); + } + PCIE_ATU_UNR_UPPER_LIMIT => { + // Update the upper 32 bits of cpu_limit + atu.set_cpu_limit( + (atu.cpu_limit() & 0xffffffff) + | ((mmio.value as PciConfigAddress) << 32), + ); + } + PCIE_ATU_UNR_LOWER_TARGET => { + // info!("set atu{} lower target value {:#X}", io_atu_index, mmio.value); + atu.set_pci_target( + (atu.pci_target() & !0xffffffff) | (mmio.value as PciConfigAddress), + ); + } + PCIE_ATU_UNR_UPPER_TARGET => { + // info!("set atu{} upper target value {:#X}", io_atu_index, mmio.value); + atu.set_pci_target( + (atu.pci_target() & 0xffffffff) + | ((mmio.value as PciConfigAddress) << 32), + ); + } + _ => { + warn!( + "invalid atu{} write {:#x} + {:#x}", + io_atu_index, atu_offset, mmio.size + ); + } + } + } else { + warn!("invalid atu{} read size {:#x}", io_atu_index, mmio.size); + } + } else { + // Read from virtual ATU + // warn!("read atu{} {:#x}", io_atu_index, atu_offset); match atu_offset { PCIE_ATU_UNR_REGION_CTRL1 => { - // info!("set atu0 region ctrl1 value {:#X}", mmio.value); - atu.set_atu_type(AtuType::from_u8((mmio.value & 0xff) as u8)); + mmio.value = atu.atu_type() as usize; } PCIE_ATU_UNR_REGION_CTRL2 => { - // Enable bit is written here, but we just track it - // The actual enable is handled by the driver + mmio.value = ATU_ENABLE_BIT as usize; } PCIE_ATU_UNR_LOWER_BASE => { - // info!("set atu0 lower base value {:#X}", mmio.value); - atu.set_cpu_base( - (atu.cpu_base() & !0xffffffff) | (mmio.value as PciConfigAddress), - ); + mmio.value = (atu.cpu_base() & 0xffffffff) as usize; } PCIE_ATU_UNR_UPPER_BASE => { - // info!("set atu0 upper base value {:#X}", mmio.value); - atu.set_cpu_base( - (atu.cpu_base() & 0xffffffff) - | ((mmio.value as PciConfigAddress) << 32), - ); + mmio.value = ((atu.cpu_base() >> 32) & 0xffffffff) as usize; } PCIE_ATU_UNR_LIMIT => { - // info!("set atu0 limit value {:#X}", mmio.value); - atu.set_cpu_limit( - (atu.cpu_limit() & !0xffffffff) | (mmio.value as PciConfigAddress), - ); + let limit_value = (atu.cpu_limit() & 0xffffffff) as usize; + mmio.value = if limit_value == 0 { + atu.limit_hw_value() as usize + } else { + limit_value + }; } PCIE_ATU_UNR_UPPER_LIMIT => { - // Update the upper 32 bits of cpu_limit - atu.set_cpu_limit( - (atu.cpu_limit() & 0xffffffff) - | ((mmio.value as PciConfigAddress) << 32), - ); + let upper_limit = ((atu.cpu_limit() >> 32) & 0xffffffff) as usize; + mmio.value = if upper_limit == 0xffffffff { + atu.upper_limit_hw_value() as usize + } else { + upper_limit + }; } PCIE_ATU_UNR_LOWER_TARGET => { - // info!("set atu0 lower target value {:#X}", mmio.value); - atu.set_pci_target( - (atu.pci_target() & !0xffffffff) | (mmio.value as PciConfigAddress), - ); + mmio.value = (atu.pci_target() & 0xffffffff) as usize; } PCIE_ATU_UNR_UPPER_TARGET => { - // info!("set atu0 upper target value {:#X}", mmio.value); - atu.set_pci_target( - (atu.pci_target() & 0xffffffff) - | ((mmio.value as PciConfigAddress) << 32), - ); + mmio.value = ((atu.pci_target() >> 32) & 0xffffffff) as usize; } _ => { - warn!("invalid atu0 write {:#x} + {:#x}", atu_offset, mmio.size); + warn!("invalid atu{} read {:#x}", io_atu_index, atu_offset); + mmio_perform_access(_base, mmio); } } - } else { - warn!("invalid atu0 read size {:#x}", mmio.size); - } - } else { - // Read from virtual ATU - // warn!("read atu0 {:#x}", atu_offset); - match atu_offset { - PCIE_ATU_UNR_REGION_CTRL1 => { - mmio.value = atu.atu_type() as usize; - } - PCIE_ATU_UNR_REGION_CTRL2 => { - mmio.value = ATU_ENABLE_BIT as usize; - } - PCIE_ATU_UNR_LOWER_BASE => { - mmio.value = (atu.cpu_base() & 0xffffffff) as usize; - } - PCIE_ATU_UNR_UPPER_BASE => { - mmio.value = ((atu.cpu_base() >> 32) & 0xffffffff) as usize; - } - PCIE_ATU_UNR_LIMIT => { - let limit_value = (atu.cpu_limit() & 0xffffffff) as usize; - mmio.value = if limit_value == 0 { - atu.limit_hw_value() as usize - } else { - limit_value - }; - } - PCIE_ATU_UNR_UPPER_LIMIT => { - let upper_limit = ((atu.cpu_limit() >> 32) & 0xffffffff) as usize; - mmio.value = if upper_limit == 0xffffffff { - atu.upper_limit_hw_value() as usize - } else { - upper_limit - }; - } - PCIE_ATU_UNR_LOWER_TARGET => { - mmio.value = (atu.pci_target() & 0xffffffff) as usize; - } - PCIE_ATU_UNR_UPPER_TARGET => { - mmio.value = ((atu.pci_target() >> 32) & 0xffffffff) as usize; - } - _ => { - warn!("invalid atu0 read {:#x}", atu_offset); - mmio_perform_access(_base, mmio); - } } - } - } else if mmio.address > ATU_BASE + ATU_REGION_SIZE / 2 { - mmio_perform_access(_base, mmio); - } else if mmio.address >= BIT_LENTH { - // dbi read - mmio_perform_access(_base, mmio); - } else { - warn!("mmio_vpci_handler_dbi read {:#x}", mmio.address); - let offset = (mmio.address & 0xfff) as PciConfigAddress; - let zone = this_zone(); - let mut is_dev_belong_to_zone = false; + } else if mmio.address > ATU_BASE { + mmio_perform_access(ecam_base, mmio); + } else if mmio.address >= BIT_LENTH + && !(mmio.address >= PCIE_MSI_ADDR_LO && mmio.address <= PCIE_MSI_INTR0_STATUS) + { + // dbi read + mmio_perform_access(ecam_base, mmio); + } else if mmio.address >= PCIE_MSI_ADDR_LO && mmio.address <= PCIE_MSI_INTR0_STATUS { + // Handle MSI registers - virtuize only if dwc_msi feature enabled + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + // Non-delay path (or delay after init-done) uses this handler for MSI DBI regs. + // LO/HI writes are virtualized and synchronized with hvisor-managed doorbell here. + // Handle MSI registers + let dbi_offset = mmio.address; + let zone = this_zone(); + + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + + if let Some(domain_msi_info) = vbus.domain_msi_info_mut().get_mut(&domain_id) { + match dbi_offset { + PCIE_MSI_ADDR_LO => { + if mmio.is_write { + // VM writes low 32 bits of doorbell address + let new_doorbell = (domain_msi_info.get_vm_doorbell() + & 0xffffffff00000000) + | (mmio.value as u64); + domain_msi_info.set_vm_doorbell(new_doorbell); + + // Check if hardware doorbell matches hvisor's allocation from DW_MSI_DOMAINS + // Read current hardware ADDR_LO and ADDR_HI to get full doorbell address + let mut hw_hi_mmio = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: 0, + size: 4, + is_write: false, + }; + // After VM writes LO, hardware still has old LO value + // We'll use the new LO from VM write and existing HI from hardware + mmio_perform_access(ecam_base, &mut hw_hi_mmio); + let hw_doorbell = + ((hw_hi_mmio.value as u64) << 32) | (mmio.value as u64); + + // Get the authoritative doorbell from DW_MSI_DOMAINS + // Actually vm set the doorbell only when this board doesn't support arch MSI + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(domain_id); + + // If hardware doorbell doesn't match hvisor's allocation, sync it + if hw_doorbell != hw_paddr && hw_paddr != 0 { + let hw_paddr_lo = (hw_paddr & 0xffffffff) as u32; + let hw_paddr_hi = ((hw_paddr >> 32) & 0xffffffff) as u32; + + // Write hvisor's doorbell LO + let mut hw_lo_write = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: hw_paddr_lo as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_lo_write); + + // Write hvisor's doorbell HI (only if needed) + if hw_paddr_hi != (hw_hi_mmio.value as u32) { + let mut hw_hi_write = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: hw_paddr_hi as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_hi_write); + } + } + } else { + // Return the low 32 bits of VM doorbell + mmio.value = + (domain_msi_info.get_vm_doorbell() & 0xffffffff) as usize; + } + } + PCIE_MSI_ADDR_HI => { + if mmio.is_write { + // VM writes high 32 bits of doorbell address + let new_doorbell = (domain_msi_info.get_vm_doorbell() & 0xffffffff) + | ((mmio.value as u64) << 32); + domain_msi_info.set_vm_doorbell(new_doorbell); + + // Check if hardware doorbell matches hvisor's allocation from DW_MSI_DOMAINS + // Read current hardware ADDR_LO and ADDR_HI to get full doorbell address + let mut hw_lo_mmio = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: 0, + size: 4, + is_write: false, + }; + mmio_perform_access(ecam_base, &mut hw_lo_mmio); + let hw_doorbell = + ((mmio.value as u64) << 32) | (hw_lo_mmio.value as u64); + + // Get the authoritative doorbell from DW_MSI_DOMAINS + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(domain_id); + + // If hardware doorbell doesn't match hvisor's allocation, sync it + if hw_doorbell != hw_paddr && hw_paddr != 0 { + let hw_paddr_lo = (hw_paddr & 0xffffffff) as u32; + let hw_paddr_hi = ((hw_paddr >> 32) & 0xffffffff) as u32; + + // Write hvisor's doorbell HI + let mut hw_hi_write = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: hw_paddr_hi as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_hi_write); + + // Write hvisor's doorbell LO (only if needed) + if hw_paddr_lo != (hw_lo_mmio.value as u32) { + let mut hw_lo_write = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: hw_paddr_lo as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_lo_write); + } + } + } else { + // Return the high 32 bits of VM doorbell + mmio.value = ((domain_msi_info.get_vm_doorbell() >> 32) + & 0xffffffff) + as usize; + } + } + PCIE_MSI_INTR0_ENABLE | PCIE_MSI_INTR0_MASK | PCIE_MSI_INTR0_STATUS => { + // All three registers use the same bit shifting and masking logic + let hwirq_bit = domain_msi_info.hwirq_bit; + let vm_mask = domain_msi_info.get_msi_mask(); + + if mmio.is_write { + // VM writes from virqbit 0-based perspective + // Convert to hardware perspective by left-shifting by hwirq_bit + let hw_value_vm = + (mmio.value as u32 & vm_mask).wrapping_shl(hwirq_bit); + + if dbi_offset == PCIE_MSI_INTR0_STATUS { + // Status register: write 1 to clear semantics + // Mask first to ensure VM can only clear its own bits + // No need to read hardware value - just write the mapped bits + // Hardware will clear only the bits we write as 1 + // Other domains' pending interrupts remain unaffected + let mut hw_mmio_write = MMIOAccess { + address: mmio.address, + value: hw_value_vm as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_mmio_write); + } else { + // For ENABLE/MASK registers: need to preserve other domain's bits + // Read current hardware value + let mut hw_mmio = MMIOAccess { + address: mmio.address, + value: 0, + size: 4, + is_write: false, + }; + mmio_perform_access(ecam_base, &mut hw_mmio); + let hw_value = hw_mmio.value as u32; - let base = mmio.address as PciConfigAddress - offset + _base as PciConfigAddress; + // Create mask for this domain's MSI bits + let domain_mask = vm_mask.wrapping_shl(hwirq_bit); - let dev: Option = { - let mut guard = zone.write(); - let vbus = guard.vpci_bus_mut(); - if let Some(dev) = vbus.get_device_by_base(base) { - is_dev_belong_to_zone = true; - Some(dev) - } else { - drop(guard); - // Clone Arc first while holding GLOBAL_PCIE_LIST lock, then release it - // This avoids holding multiple locks simultaneously - let dev_clone = { - let global_pcie_list = GLOBAL_PCIE_LIST.lock(); - global_pcie_list - .values() - .find(|dev| { - let dev_guard = dev.read(); - dev_guard.get_base() == base - }) - .cloned() - }; - dev_clone - } - }; + // Update hardware value: clear domain bits, then set new ones + let new_hw_value = + (hw_value & !domain_mask) | (hw_value_vm & domain_mask); - let dev = match dev { - Some(dev) => dev, - None => { - handle_device_not_found(mmio, offset); - return Ok(()); + let mut hw_mmio_write = MMIOAccess { + address: mmio.address, + value: new_hw_value as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_mmio_write); + } + } else { + // Read and convert from hardware perspective to VM perspective + // Read hardware value + let mut hw_mmio = MMIOAccess { + address: mmio.address, + value: 0, + size: 4, + is_write: false, + }; + mmio_perform_access(ecam_base, &mut hw_mmio); + let hw_value = hw_mmio.value as u32; + + // Right shift to get VM perspective and mask + let vm_value = hw_value.wrapping_shr(hwirq_bit) & vm_mask; + mmio.value = vm_value as usize; + } + } + _ => { + // Other DBI registers + mmio_perform_access(ecam_base, mmio); + } + } + } else { + warn!("No MSI domain info found for domain {}", domain_id); + mmio_perform_access(ecam_base, mmio); + } } - }; - let is_root = is_this_root_zone(); - let is_direct = true; // dbi handler uses direct mode + #[cfg(not(feature = "dwc_msi"))] + { + // Without dwc_msi feature, directly pass through MSI register access + mmio_perform_access(ecam_base, mmio); + } + } else { + // warn!("mmio_vpci_handler_dbi read {:#x}", mmio.address); + let offset = (mmio.address & 0xfff) as PciConfigAddress; + let zone = this_zone(); + let mut is_dev_belong_to_zone = false; + + let base = mmio.address as PciConfigAddress - offset + ecam_base as PciConfigAddress; + + let dev: Option = { + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + if let Some(dev) = vbus.get_device_by_base(base) { + is_dev_belong_to_zone = true; + Some(dev) + } else { + drop(guard); + // Clone Arc first while holding GLOBAL_PCIE_LIST lock, then release it + // This avoids holding multiple locks simultaneously + let dev_clone = { + let global_pcie_list = GLOBAL_PCIE_LIST.lock(); + global_pcie_list + .values() + .find(|dev| { + let dev_guard = dev.read(); + dev_guard.get_base() == base + }) + .cloned() + }; + dev_clone + } + }; - handle_config_space_access(dev, mmio, offset, is_direct, is_root, is_dev_belong_to_zone)?; + let dev = match dev { + Some(dev) => dev, + None => { + handle_device_not_found(mmio, offset); + return Ok(()); + } + }; + + let is_root = is_this_root_zone(); + let is_direct = true; // dbi handler uses direct mode + + handle_config_space_access( + dev, + mmio, + offset, + is_direct, + is_root, + is_dev_belong_to_zone, + )?; + } + } else { + warn!("No extend config found for ecam_base {:#x}", _base); } Ok(()) } +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +static DBI_PCI_INIT_DONE: Lazy>> = + Lazy::new(|| Mutex::new(BTreeMap::new())); + +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +pub fn is_pci_init_done(domain_id: u8) -> bool { + DBI_PCI_INIT_DONE + .lock() + .get(&domain_id) + .copied() + .unwrap_or(false) +} + +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +fn set_pci_init_done(domain_id: u8) { + DBI_PCI_INIT_DONE.lock().insert(domain_id, true); +} + pub fn mmio_vpci_direct_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { let zone = this_zone(); let offset = (mmio.address & 0xfff) as PciConfigAddress; @@ -1020,3 +2245,223 @@ pub fn mmio_vpci_direct_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult Ok(()) } + +/// Handle MMIO access to MSIX table in BAR memory +pub fn mmio_msix_table_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { + let access_offset = mmio.address as u64; + let base_aligned = (base as u64) & !0xf; + + // Find the device matching this BAR's physical address and get domain_id from BDF + let (device_info, _domain_id) = { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + + // Find the device whose MSIX BAR paddr matches the handler base + let mut result = None; + let mut domain_id = 0xFF; + for dev in vbus.devs_ref().values() { + if let Some(msi_info) = dev.read().get_msi_info() { + if let Some(msix) = &msi_info.msix_info { + let msix_bar_aligned = msix.bar_paddr & !0xf; + if msix_bar_aligned == base_aligned { + // Get domain_id from device's BDF + domain_id = dev.read().get_bdf().domain(); + result = Some((dev.clone(), msix.offset, msix.entry_count)); + break; + } + } + } + } + + if result.is_none() { + let global_pcie_list = GLOBAL_PCIE_LIST.lock(); + for dev in global_pcie_list.values() { + if let Some(msi_info) = dev.read().get_msi_info() { + if let Some(msix) = &msi_info.msix_info { + let msix_bar_aligned = msix.bar_paddr & !0xf; + if msix_bar_aligned == base_aligned { + domain_id = dev.read().get_bdf().domain(); + result = Some((dev.clone(), msix.offset, msix.entry_count)); + break; + } + } + } + } + } + (result, domain_id) + }; + + // Check if this access is within the MSIX table range + if let Some((dev, msix_offset, entry_count)) = device_info { + let vbdf = dev.get_vbdf(); + + let msix_table_size = (entry_count as u64) * 16; // Each entry is 16 bytes + let msix_table_end = msix_offset + msix_table_size; + + if access_offset >= msix_offset && access_offset < msix_table_end { + // This is a MSIX table access, record it with detailed information + let offset_in_entry = access_offset - msix_offset; + let entry_index = offset_in_entry / 16; + let field_offset = offset_in_entry % 16; + let host_bdf = dev.get_bdf(); + let field_name = match field_offset { + 0..=3 => "msg_addr_lo", + 4..=7 => "msg_addr_hi", + 8..=11 => "msg_data", + 12..=15 => "vector_ctrl", + _ => "unknown", + }; + + if mmio.is_write { + let vm_value = mmio.value; + match field_offset { + 0..=3 => { + // Save VM's doorbell low 32 bits + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff00000000; + msi_info.set_doorbell(current | (mmio.value as u64)); + }); + + // Replace with hvisor's doorbell before writing to hardware + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + let hw_doorbell_lo = (hw_paddr & 0xffffffff) as usize; + mmio.value = hw_doorbell_lo; + } + } + } + 4..=7 => { + // Save VM's doorbell high 32 bits + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff; + msi_info.set_doorbell(current | ((mmio.value as u64) << 32)); + }); + + // Replace with hvisor's doorbell before writing to hardware + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + let hw_doorbell_hi = ((hw_paddr >> 32) & 0xffffffff) as usize; + mmio.value = hw_doorbell_hi; + } + } + } + 8..=11 => { + // Convert VM vector index to hardware vector index. + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = + vbus.domain_msi_info().get(&_domain_id) + { + let virq_bit = mmio.value as u32; + let hwirq_bit = domain_msi_info.hwirq_bit; + let hw_value = virq_bit.wrapping_add(hwirq_bit); + mmio.value = hw_value as usize; + } + } + } + } + 12..=15 => {} + _ => {} + } + + mmio_perform_access(base, mmio); + let hw_value = mmio.value; + info!( + "MSIX access: rw W, vbdf {:#?}, host_bdf {:#?}, base {:#x}, entry {}, field {}, vm_value {:#x}, hw_value {:#x}", + vbdf, + host_bdf, + base, + entry_index, + field_name, + vm_value, + hw_value + ); + return Ok(()); + } else { + let mut hw_mmio = MMIOAccess { + address: mmio.address, + value: 0, + size: mmio.size, + is_write: false, + }; + mmio_perform_access(base, &mut hw_mmio); + let hw_value = hw_mmio.value; + + match field_offset { + 0..=3 => { + let dev_vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi| msi.msi_doorbell) + .unwrap_or(0); + mmio.value = (dev_vm_doorbell & 0xffffffff) as usize; + } + 4..=7 => { + let dev_vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi| msi.msi_doorbell) + .unwrap_or(0); + mmio.value = ((dev_vm_doorbell >> 32) & 0xffffffff) as usize; + } + 8..=11 => { + mmio.value = hw_value; + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = + vbus.domain_msi_info().get(&_domain_id) + { + let hwirq_bit = domain_msi_info.hwirq_bit; + let hw_vec = hw_value as u32; + let virq_bit = if hw_vec >= hwirq_bit { + hw_vec - hwirq_bit + } else { + hw_vec + }; + mmio.value = virq_bit as usize; + } + } + } + } + 12..=15 => { + mmio.value = hw_value; + } + _ => { + mmio.value = hw_value; + } + } + + info!( + "MSIX access: rw R, vbdf {:#?}, host_bdf {:#?}, base {:#x}, entry {}, field {}, vm_value {:#x}, hw_value {:#x}", + vbdf, + host_bdf, + base, + entry_index, + field_name, + mmio.value, + hw_value + ); + return Ok(()); + } + } + } + + mmio_perform_access(base, mmio); + + Ok(()) +} diff --git a/src/pci/pci_struct.rs b/src/pci/pci_struct.rs index 87fdf70b..d26eed0a 100644 --- a/src/pci/pci_struct.rs +++ b/src/pci/pci_struct.rs @@ -309,14 +309,19 @@ impl VirtualPciAccessBits { let mut bits = BitArray::ZERO; bits[0x0..0x4].fill(true); // ID bits[0x08..0x0c].fill(true); // CLASS - bits[0x10..0x34].fill(true); //bar and rom + bits[0x10..0x34].fill(true); // BARs and ROM + bits[0x34..0x38].fill(true); // Capability Pointer + bits[0x40..0x100].fill(true); // Capability region (caps start at 0x40) Self { bits } } pub fn bridge() -> Self { - Self { - bits: BitArray::ZERO, - } + let mut bits = BitArray::ZERO; + bits[0x10..0x18].fill(true); // BARs + bits[0x38..0x3c].fill(true); // ROM + bits[0x34..0x38].fill(true); // Capability Pointer + bits[0x40..0x100].fill(true); // Capability region (caps start at 0x40) + Self { bits } } pub fn host_bridge() -> Self { @@ -336,6 +341,45 @@ impl VirtualPciAccessBits { } } +#[derive(Clone, Copy, Debug)] +pub struct MsixInfo { + pub bar_id: u8, + pub offset: u64, + pub entry_count: u32, // number of MSIX table entries + pub bar_paddr: u64, // physical address of the BAR +} + +#[derive(Clone, Debug)] +pub struct MsiInfo { + pub msi_count: u32, + // doorbell vm write to trigger interrupt + pub msi_doorbell: u64, + pub msix_info: Option, +} + +impl MsiInfo { + pub fn new(msi_count: u32) -> Self { + Self { + msi_count, + msi_doorbell: 0, + msix_info: None, + } + } + + pub fn set_doorbell(&mut self, doorbell: u64) { + self.msi_doorbell = doorbell; + } + + pub fn set_msix_info(&mut self, bar_id: u8, offset: u64, entry_count: u32, bar_paddr: u64) { + self.msix_info = Some(MsixInfo { + bar_id, + offset, + entry_count, + bar_paddr, + }); + } +} + /* VirtualPciConfigSpace * bdf: the bdf hvisor seeing(same with the bdf without hvisor) * vbdf: the bdf zone seeing, it can set just you like without sr-iov @@ -365,6 +409,9 @@ pub struct VirtualPciConfigSpace { capabilities: PciCapabilityList, dev_type: VpciDevType, + + // MSI/MSIX info for this device + msi_info: Option, } #[derive(Clone)] @@ -533,6 +580,22 @@ impl ArcRwLockVirtualPciConfigSpace { f(&guard.capabilities) } + pub fn with_msi_info(&self, f: F) -> Option + where + F: FnOnce(&MsiInfo) -> R, + { + let guard = self.0.read(); + guard.msi_info.as_ref().map(|msi_info| f(msi_info)) + } + + pub fn with_msi_info_mut(&self, f: F) -> Option + where + F: FnOnce(&mut MsiInfo) -> R, + { + let mut guard = self.0.write(); + guard.msi_info.as_mut().map(|msi_info| f(msi_info)) + } + pub fn read(&self) -> spin::RwLockReadGuard<'_, VirtualPciConfigSpaceWithZone> { self.0.read() } @@ -675,8 +738,14 @@ impl Debug for VirtualPciConfigSpace { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, - "\n bdf {:#?}\n base {:#x}\n type {:#?}\n {:#?}\n {:#?}\n {:#?}", - self.bdf, self.base, self.config_type, self.bararr, self.rom, self.capabilities + "\n bdf {:#?}\n base {:#x}\n type {:#?}\n msi_info {:#x?}\n {:#?}\n {:#?}\n {:#?}", + self.bdf, + self.base, + self.config_type, + self.msi_info, + self.bararr, + self.rom, + self.capabilities ) } } @@ -707,6 +776,7 @@ impl VirtualPciConfigSpace { rom: PciMem::default(), capabilities: PciCapabilityList::new(), dev_type, + msi_info: None, } } @@ -737,6 +807,7 @@ impl VirtualPciConfigSpace { rom, capabilities: PciCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, } } @@ -764,6 +835,7 @@ impl VirtualPciConfigSpace { rom, capabilities: PciCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, } } @@ -789,6 +861,7 @@ impl VirtualPciConfigSpace { rom: PciMem::default(), capabilities: PciCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, } } @@ -813,6 +886,7 @@ impl VirtualPciConfigSpace { rom: PciMem::default(), capabilities: PciCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, } } @@ -848,6 +922,71 @@ impl VirtualPciConfigSpace { self.base } + pub fn get_msi_count(&self) -> u32 { + self.msi_info + .as_ref() + .map(|info| info.msi_count) + .unwrap_or(0) + } + + /// Build MSI/MSIX info structure based on device capabilities + pub fn build_msi_info(&mut self) { + let mut msi_count = 0u32; + let mut msix_count = 0u32; + let mut msix_bar_id = 0u8; + let mut msix_offset = 0u64; + let mut has_msix = false; + + // Check if the device has MSI or MSIX capability and calculate both + for (_offset, cap) in self.capabilities.iter() { + match cap.get_type() { + CapabilityType::Msi => { + // For MSI: read offset+2, Message Control bits 3:1 contain MMC + // Supported messages = 2^MMC + if let Ok(val) = cap.with_region(|region| region.read(0x02, 2)) { + let mmc = (val & 0x0E) >> 1; // bits 3:1 + msi_count = 1u32 << mmc; + } + } + CapabilityType::MsiX => { + // For MSIX: read offset+2, bits 10-0 contain table size + // Supported messages = table_size + 1 + if let Ok(val) = cap.with_region(|region| region.read(0x02, 2)) { + let table_size = (val & 0x07FF) as u32; // bits 10-0 + msix_count = table_size + 1; + } + + // Extract MSIX table location (offset+4) + // Bits 2-0: BAR ID (0-5), Bits 31-3: table offset + if let Ok(table_info) = cap.with_region(|region| region.read(0x04, 4)) { + msix_bar_id = (table_info & 0x07) as u8; + msix_offset = ((table_info >> 3) as u64) << 3; // multiply by 8 since offset is in 8-byte increments + has_msix = true; + } + } + _ => {} + } + } + + // Create MsiInfo if device has MSI or MSIX capability + let interrupt_count = core::cmp::max(msi_count, msix_count); + if interrupt_count > 0 { + let mut msi_info = MsiInfo::new(interrupt_count); + + if has_msix { + // Read the BAR's physical address + let bar_paddr = self.bararr[msix_bar_id as usize].get_value64() & !0xf; + msi_info.set_msix_info(msix_bar_id, msix_offset, msix_count, bar_paddr); + } + + self.msi_info = Some(msi_info); + } + } + + pub fn get_msi_info(&self) -> Option<&MsiInfo> { + self.msi_info.as_ref() + } + /* now the space_init just with bar * Note: space field removed, bar values are cached in config_value.bar_value */ @@ -1071,7 +1210,7 @@ impl PciIterator { } let mut ep = EndpointHeader::new_with_region(region); - let rom = Self::rom_init(&mut ep); + let rom = Self::rom_init(&mut self.allocator, &mut ep); let bararr = Self::bar_mem_init(ep.bar_limit().into(), &mut self.allocator, &mut ep); @@ -1090,6 +1229,8 @@ impl PciIterator { ); let _ = node.capability_enumerate(); + // Build MSI/MSIX info once during device discovery + node.build_msi_info(); Some(node) } @@ -1097,7 +1238,7 @@ impl PciIterator { // For bridge: don't push host_bridge, it will be handled in Iterator::next() warn!("bridge"); let mut bridge = PciBridgeHeader::new_with_region(region); - let rom = Self::rom_init(&mut bridge); + let rom = Self::rom_init(&mut self.allocator, &mut bridge); let bararr = Self::bar_mem_init(bridge.bar_limit().into(), &mut self.allocator, &mut bridge); @@ -1114,6 +1255,8 @@ impl PciIterator { ); let _ = node.capability_enumerate(); + // Build MSI/MSIX info once during device discovery + node.build_msi_info(); Some(node) } @@ -1130,11 +1273,28 @@ impl PciIterator { } } - fn rom_init(dev: &mut D) -> PciMem { + fn rom_init( + allocator: &mut Option, + dev: &mut D, + ) -> PciMem { let mut rom = dev.parse_rom(); if rom.get_type() == PciMemType::Rom { - rom.set_value(rom.get_value() as u64); - rom.set_virtual_value(rom.get_value() as u64); + if let Some(a) = allocator { + let value = a.alloc_memory32(rom.get_size() as u64).unwrap(); + rom.set_value(value); + rom.set_virtual_value(value); + // Do not enable ROM yet, write 0 (ROM disabled) + // VM will enable it later by writing address + enable bit + // info!( + // "allocated rom address: {:#x}, write 0 (disabled) to hardware", + // value + // ); + let _ = dev.write(dev.rom_offset(), 4, 0 as _); + } else { + let value = rom.get_value() as u64; + rom.set_value(value); + rom.set_virtual_value(value); + } } rom } @@ -1227,7 +1387,7 @@ impl PciIterator { fn next_device_not_ok(&mut self) -> bool { if let Some(parent) = self.stack.last_mut() { // only one child and skip this bus - if parent.has_secondary_link { + if parent.has_only_one_child { parent.device = MAX_DEVICE; } @@ -1370,7 +1530,7 @@ impl Iterator for PciIterator { let immediate_parent_bus = parent.bus; Some(self.get_bridge().next_bridge( self.address(immediate_parent_bus, bdf), - node.has_secondary_link(), + node.has_only_one_child(), self.is_mulitple_function, self.function, next_bus, @@ -1396,7 +1556,7 @@ pub struct Bridge { secondary_bus: u8, primary_bus: u8, mmio: PciConfigMmio, - has_secondary_link: bool, + has_only_one_child: bool, is_mulitple_function: bool, } @@ -1412,7 +1572,7 @@ impl Bridge { secondary_bus: 0, primary_bus: 0, mmio: PciConfigMmio::new(0, 0), // Dummy mmio for placeholder - has_secondary_link: false, + has_only_one_child: false, is_mulitple_function: false, } } @@ -1431,7 +1591,7 @@ impl Bridge { secondary_bus: bus_begin, primary_bus: bus_begin, mmio: PciConfigMmio::new(address, CONFIG_LENTH), - has_secondary_link: false, + has_only_one_child: false, is_mulitple_function, } } @@ -1439,7 +1599,7 @@ impl Bridge { pub fn next_bridge( &self, address: PciConfigAddress, - has_secondary_link: bool, + has_only_one_child: bool, is_mulitple_function: bool, function: u8, target_bus: u8, @@ -1453,7 +1613,7 @@ impl Bridge { secondary_bus: target_bus, primary_bus: self.bus, mmio, - has_secondary_link, + has_only_one_child, is_mulitple_function, } } @@ -1482,8 +1642,8 @@ impl Bridge { } } - pub fn set_has_secondary_link(&mut self, value: bool) { - self.has_secondary_link = value; + pub fn set_has_only_one_child(&mut self, value: bool) { + self.has_only_one_child = value; } } @@ -1525,10 +1685,54 @@ impl RootComplex { } } +#[derive(Debug)] +/// MSI information for a specific domain in a VM +/// Tracks the MSI interrupts needed for this domain and the hardware base interrupt bit +pub struct DomainMsiInfo { + /// Total number of MSI interrupts needed for all devices in this domain + pub msi_count: u32, + /// Hardware MSI base bit index (allocated from domain allocator) + pub hwirq_bit: u32, + /// Virtual doorbell address set by the VM (PCIE_MSI_ADDR_LO + PCIE_MSI_ADDR_HI) + pub vm_doorbell_addr: u64, +} + +impl DomainMsiInfo { + pub fn new(msi_count: u32, hwirq_bit: u32) -> Self { + Self { + msi_count, + hwirq_bit, + vm_doorbell_addr: 0, + } + } + + /// Set the virtual doorbell address (from VM) + pub fn set_vm_doorbell(&mut self, addr: u64) { + self.vm_doorbell_addr = addr; + } + + /// Get the virtual doorbell address + pub fn get_vm_doorbell(&self) -> u64 { + self.vm_doorbell_addr + } + + /// Get MSI mask based on msi_count + /// Returns a mask with msi_count bits set (0-based, e.g. msi_count=4 -> mask=0xf) + pub fn get_msi_mask(&self) -> u32 { + if self.msi_count >= 32 { + 0xffffffff + } else { + (1u32 << self.msi_count) - 1 + } + } +} + #[derive(Debug)] pub struct VirtualRootComplex { devs: BTreeMap, base_to_bdf: BTreeMap, + // MSI interrupt information per domain (domain_id -> DomainMsiInfo) + domain_msi_info: BTreeMap, accessor: Option>, } @@ -1537,6 +1741,7 @@ impl VirtualRootComplex { Self { devs: BTreeMap::new(), base_to_bdf: BTreeMap::new(), + domain_msi_info: BTreeMap::new(), accessor: None, } } @@ -1550,22 +1755,56 @@ impl VirtualRootComplex { bdf: Bdf, dev: VirtualPciConfigSpace, ) -> Option { - let parent_bus = dev.parent_bdf.bus(); - let offset = 0; - let base = if let Some(accessor) = &self.accessor { - match accessor.get_physical_address(bdf, offset, parent_bus) { - Ok(addr) => addr, - Err(_) => { - warn!("can not get physical address for device {:#?}(vbdf), reset device base same to hardware", bdf); - dev.get_base() - } + let base = dev.get_base(); + let host_bdf = dev.get_bdf(); + let vbdf = dev.get_vbdf(); + + #[cfg(feature = "dwc_pcie")] + let key = { + let bus = bdf.bus() as PciConfigAddress; + let device = bdf.device() as PciConfigAddress; + let function = bdf.function() as PciConfigAddress; + let pci_addr = (bus << 24) + (device << 19) + (function << 16); + if bus != 0 { + pci_addr + } else { + base } - } else { - warn!("can not found accessor for vpci bus, reset device base same to hardware"); - dev.get_base() }; - info!("pci insert base {:#x} to bdf {:#?}", base, bdf); - self.base_to_bdf.insert(base, bdf); + + #[cfg(not(feature = "dwc_pcie"))] + let key = base; + + #[cfg(feature = "dwc_pcie")] + { + let bus = bdf.bus() as PciConfigAddress; + let device = bdf.device() as PciConfigAddress; + let function = bdf.function() as PciConfigAddress; + let pci_addr = (bus << 24) + (device << 19) + (function << 16); + info!( + "vpci insert: base_to_bdf[{:#x}] = key_bdf {:#?}, source {}, base {:#x}, pci_addr {:#x}, dev_host_bdf {:#?}, dev_vbdf {:#?}, remapped {}", + key, + bdf, + if key == pci_addr { "pci_addr" } else { "base" }, + base, + pci_addr, + host_bdf, + vbdf, + host_bdf != vbdf + ); + } + + #[cfg(not(feature = "dwc_pcie"))] + info!( + "vpci insert: base_to_bdf[{:#x}] = key_bdf {:#?}, source base, base {:#x}, dev_host_bdf {:#?}, dev_vbdf {:#?}, remapped {}", + key, + bdf, + base, + host_bdf, + vbdf, + host_bdf != vbdf + ); + self.base_to_bdf.insert(key, bdf); self.devs .insert(bdf, ArcRwLockVirtualPciConfigSpace::new(dev)) } @@ -1574,6 +1813,10 @@ impl VirtualRootComplex { &mut self.devs } + pub fn devs_ref(&self) -> &BTreeMap { + &self.devs + } + pub fn get(&self, bdf: &Bdf) -> Option<&ArcRwLockVirtualPciConfigSpace> { self.devs.get(bdf) } @@ -1590,6 +1833,34 @@ impl VirtualRootComplex { let bdf = self.base_to_bdf.get(&base).copied()?; self.devs.get(&bdf).cloned() } + + /// Add MSI count for a specific domain with allocated hardware interrupt bit + pub fn add_msi_count_for_domain(&mut self, domain: u8, msi_count: u32, hwirq_bit: u32) { + let vm_doorbell = self + .domain_msi_info + .get(&domain) + .map(|info| info.get_vm_doorbell()) + .unwrap_or(0); + + let mut info = DomainMsiInfo::new(msi_count, hwirq_bit); + info.set_vm_doorbell(vm_doorbell); + self.domain_msi_info.insert(domain, info); + } + + /// Get MSI info for a specific domain + pub fn get_domain_msi_info(&self, domain: u8) -> Option<&DomainMsiInfo> { + self.domain_msi_info.get(&domain) + } + + /// Get reference to domain MSI info map + pub fn domain_msi_info(&self) -> &BTreeMap { + &self.domain_msi_info + } + + /// Get mutable reference to domain MSI info map + pub fn domain_msi_info_mut(&mut self) -> &mut BTreeMap { + &mut self.domain_msi_info + } } #[derive(Debug)] @@ -1963,34 +2234,35 @@ impl VirtualPciConfigSpace { self.capabilities = capabilities; } - //TODO: check secondary link by read cap - pub fn has_secondary_link(&self) -> bool { + // detect whether this bridge secondary bus can have only one child device. + pub fn has_only_one_child(&self) -> bool { match self.config_type { HeaderType::PciBridge => { - // Find PciExpress capability - // warn!("has_secondary_link {:#?}", self.capabilities); - // for (_, capability) in &self.capabilities { - // if capability.cap_type == CapabilityType::PciExpress { - // // Read PCIe Capability Register at offset + 0x00 - // // Bits 4:0 contain the Device/Port Type - // let offset = capability.get_offset(); - // if let Ok(cap_reg) = self.backend.read(offset, 2) { - // let type_val = (cap_reg as u16).get_bits(0..5); - // if type_val == PCI_EXP_TYPE_ROOT_PORT || type_val == PCI_EXP_TYPE_PCIE_BRIDGE { - // return true; - // } else if type_val == PCI_EXP_TYPE_UPSTREAM || type_val == PCI_EXP_TYPE_DOWNSTREAM { - // // Parent check is not implemented, set to false for now - // return false; - // } - // } - // break; - // } - // } - // false - // #[cfg(feature = "dwc_pcie")] - // return true; - // #[cfg(not(feature = "dwc_pcie"))] - return false; + // Parse PCIe Device/Port Type from PCI Express Capability Register + // (capability offset + 0x02, bits 7:4). + for capability in self._capability_enumerate(self.backend.clone()) { + if capability.get_type() != CapabilityType::PciExpress { + continue; + } + + let offset = capability.get_offset(); + if let Ok(cap_reg) = self.backend.read(offset + 0x2, 2) { + let port_type = (cap_reg as u16).get_bits(4..8) as u16; + return match port_type { + // Root Port / Downstream Port: secondary bus has a single downstream link. + PCI_EXP_TYPE_ROOT_PORT | PCI_EXP_TYPE_DOWNSTREAM => true, + // Upstream Port / PCIe-to-PCI bridge can have multiple children behind it. + PCI_EXP_TYPE_UPSTREAM | PCI_EXP_TYPE_PCIE_BRIDGE => false, + _ => false, + }; + } + + // Capability exists but cannot be read safely. + return false; + } + + // Non-PCIe bridge (or no PCIe capability): keep full secondary-bus scan. + false } _ => false, } diff --git a/src/zone.rs b/src/zone.rs index 158d8c67..d33ab5f9 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -36,6 +36,13 @@ use crate::memory::{MMIOConfig, MMIOHandler, MMIORegion, MemorySet}; use core::panic; use core::sync::atomic::{AtomicBool, Ordering}; +#[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] +use crate::config::{HvPciConfig, HvPciDevConfig, CONFIG_MAX_PCI_DEV, CONFIG_PCI_BUS_MAXNUM}; +#[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] +use crate::pci::pci_config::GLOBAL_PCIE_LIST; +#[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] +use crate::pci::pci_struct::Bdf; + #[cfg(feature = "dwc_pcie")] #[derive(Debug)] pub struct VirtualAtuConfigs { @@ -66,13 +73,6 @@ impl VirtualAtuConfigs { self.ecam_to_atu.insert(ecam_base, atu) } - pub fn get_or_insert_atu(&mut self, ecam_base: usize, f: F) -> &mut AtuConfig - where - F: FnOnce() -> AtuConfig, - { - self.ecam_to_atu.entry(ecam_base).or_insert_with(f) - } - pub fn get_atu_by_io_base(&self, io_base: PciConfigAddress) -> Option<&AtuConfig> { let ecam = self.io_base_to_ecam.get(&io_base); if let Some(ecam) = ecam { @@ -317,6 +317,459 @@ impl ZoneInner { pub fn atu_configs_mut(&mut self) -> &mut VirtualAtuConfigs { &mut self.atu_configs } + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + pub fn guest_pci_init_delay( + &mut self, + _zone_id: usize, + alloc_pci_devs: &[HvPciDevConfig; CONFIG_MAX_PCI_DEV], + num_pci_devs: u64, + pci_config: &[HvPciConfig], + _num_pci_config: usize, + ) -> HvResult { + let mut guard = GLOBAL_PCIE_LIST.lock(); + for target_pci_config in pci_config { + // Skip empty config + if target_pci_config.ecam_base == 0 { + continue; + } + + #[allow(unused_variables)] + let ecam_base = target_pci_config.ecam_base; + let target_domain = target_pci_config.domain; + let bus_range_begin = target_pci_config.bus_range_begin as u8; + + // Create accessor for VirtualRootComplex, similar to RootComplex + #[cfg(feature = "dwc_pcie")] + { + use crate::pci::config_accessors::dwc::DwcConfigAccessor; + use crate::platform; + use alloc::sync::Arc; + + let atu_config = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|atu_cfg| atu_cfg.ecam_base == ecam_base); + + match atu_config { + Some(cfg) => { + let root_bus = bus_range_begin; + let accessor = Arc::new(DwcConfigAccessor::new(cfg, root_bus)); + self.vpci_bus_mut().set_accessor(accessor); + } + None => { + warn!("No ATU config found for ecam_base 0x{:x}", ecam_base); + continue; + } + } + } + + #[cfg(feature = "loongarch64_pcie")] + { + use crate::pci::config_accessors::loongarch64::LoongArchConfigAccessor; + use alloc::sync::Arc; + + let root_bus = bus_range_begin; + let accessor = Arc::new(LoongArchConfigAccessor::new( + ecam_base, + target_pci_config.ecam_size, + root_bus, + )); + self.vpci_bus_mut().set_accessor(accessor); + } + + #[cfg(feature = "ecam_pcie")] + { + use crate::pci::config_accessors::ecam::EcamConfigAccessor; + use alloc::sync::Arc; + + let accessor = Arc::new(EcamConfigAccessor::new(ecam_base)); + self.vpci_bus_mut().set_accessor(accessor); + } + + let mut filtered_devices: alloc::vec::Vec = alloc::vec::Vec::new(); + for i in 0..num_pci_devs { + let dev_config = alloc_pci_devs[i as usize]; + if dev_config.domain == target_domain { + filtered_devices.push(dev_config); + } + } + + // Skip if no devices for this domain + if filtered_devices.is_empty() { + continue; + } + + filtered_devices.sort_by(|a, b| { + a.bus + .cmp(&b.bus) + .then_with(|| a.device.cmp(&b.device)) + .then_with(|| a.function.cmp(&b.function)) + }); + + let mut vbus_pre = bus_range_begin; + let mut bus_pre = bus_range_begin; + let mut device_pre = 0u8; + let mut domain_msi_count: u32 = 0; + let mut vdevice_pre = 0u8; + + /* + * To allow Linux to successfully recognize the devices we add, hvisor needs + * to adjust the devices’ BDFs. Linux always assumes that the PCIe buses + * it discovers are contiguous, and that device function numbers always start from 0. + * + * 1. The bus number of a virtual BDF (vBDF) must start from range_begin and + * be contiguous. Once the physical bus number increases—regardless of + * how much it increases—the corresponding virtual bus number (vbus) + * can only increase by 1. + * + * 2. If the function number of a vBDF is not 0, and it is found that + * the device with function 0 of the same vBDF does not belong to the current zone, + * then the function number of the current vBDF should be set to 0. + */ + for dev_config in &filtered_devices { + let bdf = Bdf::new_from_config(*dev_config); + let bus = bdf.bus(); + let device = bdf.device(); + let function = bdf.function(); + + /* + * vfunction = if (bus != bus_pre || device != device_pre) && function != 0 + * In practice, remapping is performed only for new devices whose function is not 0; + * however, the check for function != 0 does not affect the final result. + */ + let vfunction = if bus != bus_pre || device != device_pre { + 0 + } else { + function + }; + + let vbus = if bus > bus_pre { + vbus_pre += 1; + vbus_pre + } else { + vbus_pre + }; + + // Remap device number to be contiguous, starting from 0 + let vdevice = if bus != bus_pre || device != device_pre { + // New bus or new device, increment device counter + if bus != bus_pre { + vdevice_pre = 0; + } else { + vdevice_pre += 1; + } + vdevice_pre + } else { + // Same bus and device, keep the same virtual device number + vdevice_pre + }; + + let vbdf = Bdf::new(bdf.domain(), vbus, vdevice, vfunction); + + device_pre = device; + bus_pre = bus; + + // TODO: adjust vbdf will cause line interrupt injecet error, so remove it temporarily + #[cfg(not(feature = "dwc_msi"))] + let vbdf = bdf; + + info!("set bdf {:#?} to vbdf {:#?}", bdf, vbdf); + + #[cfg(any( + all(feature = "iommu", target_arch = "aarch64"), + all(feature = "iommu", target_arch = "riscv64"), + target_arch = "x86_64" + ))] + { + let iommu_pt_addr = if self.iommu_pt().is_some() { + self.iommu_pt().unwrap().root_paddr() + } else { + 0 + }; + let device_id = (dev_config.bus as usize) << 8 + | (dev_config.device as usize) << 3 + | dev_config.function as usize; + #[cfg(feature = "share_s2pt")] + crate::device::iommu::iommu_add_device_with_root_pt_addr( + _zone_id, + device_id as _, + self.gpm().root_paddr(), + ); + #[cfg(not(feature = "share_s2pt"))] + crate::device::iommu::iommu_add_device_with_root_pt_addr( + _zone_id, + device_id as _, + iommu_pt_addr, + ); + } + + // Insert device into vpci_bus with calculated vbdf + if let Some(dev) = guard.get(&bdf) { + if bdf.is_host_bridge(dev.read().get_host_bdf().bus()) + || dev.with_config_value(|config_value| -> bool { + config_value.get_class().0 == 0x6 + }) + { + let mut vdev = dev.read().config_space.clone(); + vdev.set_vbdf(vbdf); + let msi_count = vdev.get_msi_count(); + domain_msi_count += msi_count; + self.vpci_bus_mut().insert(vbdf, vdev); + } else { + // Check if device is already allocated to another zone + if dev.get_zone_id().is_none() { + dev.set_zone_id(Some(_zone_id as u32)); + let mut vdev_inner = dev.read().config_space.clone(); + vdev_inner.set_vbdf(vbdf); + let msi_count = vdev_inner.get_msi_count(); + domain_msi_count += msi_count; + self.vpci_bus_mut().insert(vbdf, vdev_inner); + } else { + warn!( + "Device {:#?} is already allocated to zone {:?}", + bdf, + dev.get_zone_id() + ); + } + } + } else { + warn!("can not find dev {:#?} in GLOBAL_PCIE_LIST (not detected during enumeration)", bdf); + #[cfg(feature = "ecam_pcie")] + { + use crate::pci::pci_struct::VirtualPciConfigSpace; + use crate::pci::vpci_dev::{get_handler, VpciDevType}; + + let dev_type = dev_config.dev_type; + match dev_type { + VpciDevType::Physical => { + warn!("can not find dev {:#?}", bdf); + } + _ => { + if let Some(_handler) = get_handler(dev_type) { + let base = ecam_base + + ((bdf.bus() as u64) << 20) + + ((bdf.device() as u64) << 15) + + ((bdf.function() as u64) << 12); + let dev = VirtualPciConfigSpace::virt_dev(bdf, base, dev_type); + self.vpci_bus_mut().insert(vbdf, dev); + } else { + warn!("can not find dev {:#?}, unknown device type", bdf); + } + } + } + } + } + } + + // After processing all devices for this domain, allocate hardware MSI bits + if domain_msi_count > 0 { + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + // Get the DW MSI domain allocator and allocate hwbit + if let Some(mut domain_lock) = + crate::pci::dwc_msi::get_dwc_msi_domain_mut(target_domain) + { + if let Some(domain_msi) = domain_lock.get_mut(&target_domain) { + let zone_cpu_set = self.cpu_set(); + let target_cpu = zone_cpu_set.first_cpu().unwrap_or(0); + match domain_msi.allocate_for_cpu(target_cpu, domain_msi_count) { + Ok(hwirq_bit) => { + // Register the MSI info for this domain + self.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + hwirq_bit, + ); + } + Err(e) => { + warn!( + "Failed to allocate MSI for domain {}: {:?}", + target_domain, e + ); + } + } + } + } + } + + #[cfg(not(feature = "dwc_msi"))] + { + // Without dwc_msi feature, just register without hardware bit allocation + self.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + 0, // hwirq_bit is 0 when not using dwc_msi + ); + } + } + } + info!("vpci bus init done\n {:#x?}", self.vpci_bus()); + Ok(()) + } + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + pub fn virtual_pci_dbi_pref_init( + &mut self, + pci_rootcomplex_config: &[HvPciConfig; CONFIG_PCI_BUS_MAXNUM], + _num_pci_config: usize, + ) { + use crate::pci::pci_handler::mmio_vpci_handler_dbi; + + for rootcomplex_config in pci_rootcomplex_config { + if rootcomplex_config.ecam_base == 0 { + continue; + } + + let encoded_arg = + rootcomplex_config.ecam_base as usize + (rootcomplex_config.domain as usize); + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_handler_dbi, + encoded_arg, + ); + } + } + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + pub fn virtual_pci_mmio_init_delay( + &mut self, + pci_rootcomplex_config: &[HvPciConfig; CONFIG_PCI_BUS_MAXNUM], + _num_pci_config: usize, + ) { + #[cfg(feature = "loongarch64_pcie")] + let mut emergency_map_regions: alloc::vec::Vec<(usize, usize)> = alloc::vec::Vec::new(); + + for rootcomplex_config in pci_rootcomplex_config { + if rootcomplex_config.ecam_base == 0 { + continue; + } + #[cfg(feature = "ecam_pcie")] + { + use crate::pci::pci_handler::mmio_vpci_handler; + + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_handler, + rootcomplex_config.ecam_base as usize, + ); + } + #[cfg(feature = "dwc_pcie")] + { + use crate::memory::mmio_generic_handler; + use crate::pci::config_accessors::dwc_atu::AtuConfig; + use crate::pci::config_accessors::{dwc::DwcConfigRegionBackend, PciRegionMmio}; + use crate::pci::pci_handler::{ + mmio_dwc_cfg_handler, mmio_dwc_io_handler, mmio_vpci_handler_dbi, + }; + use crate::platform; + + let encoded_arg = + rootcomplex_config.ecam_base as usize + (rootcomplex_config.domain as usize); + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_handler_dbi, + encoded_arg, + ); + + let extend_config = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|extend_cfg| extend_cfg.ecam_base == rootcomplex_config.ecam_base); + + if let Some(extend_config) = extend_config { + if extend_config.apb_base != 0 && extend_config.apb_size != 0 { + self.mmio_region_register( + extend_config.apb_base as usize, + extend_config.apb_size as usize, + mmio_generic_handler, + extend_config.apb_base as usize, + ); + } + + let cfg_size_half = extend_config.cfg_size / 2; + let cfg0_base = extend_config.cfg_base; + if cfg0_base != 0 && cfg_size_half != 0 { + self.mmio_region_register( + cfg0_base as usize, + cfg_size_half as usize, + mmio_dwc_cfg_handler, + cfg0_base as usize, + ); + } + + let cfg1_base = extend_config.cfg_base + cfg_size_half; + if cfg1_base != 0 && cfg_size_half != 0 { + self.mmio_region_register( + cfg1_base as usize, + cfg_size_half as usize, + mmio_dwc_cfg_handler, + cfg1_base as usize, + ); + } + + if extend_config.io_cfg_atu_shared != 0 { + self.mmio_region_register( + rootcomplex_config.io_base as usize, + rootcomplex_config.io_size as usize, + mmio_dwc_io_handler, + rootcomplex_config.io_base as usize, + ); + } + + let mut atu = AtuConfig::default(); + + let dbi_base = extend_config.dbi_base as crate::pci::PciConfigAddress; + let dbi_size = extend_config.dbi_size; + let dbi_region = PciRegionMmio::new(dbi_base, dbi_size); + let dbi_backend = DwcConfigRegionBackend::new(dbi_region); + if let Err(e) = atu.init_limit_hw_value(&dbi_backend) { + warn!("Failed to initialize ATU0 limit defaults: {:?}", e); + } + + self.atu_configs_mut() + .insert_atu(rootcomplex_config.ecam_base as usize, atu); + self.atu_configs_mut().insert_cfg_base_mapping( + extend_config.cfg_base as crate::pci::PciConfigAddress, + rootcomplex_config.ecam_base as usize, + ); + self.atu_configs_mut().insert_io_base_mapping( + rootcomplex_config.io_base as crate::pci::PciConfigAddress, + rootcomplex_config.ecam_base as usize, + ); + } + } + #[cfg(feature = "loongarch64_pcie")] + { + use crate::pci::pci_handler::mmio_vpci_direct_handler; + + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_direct_handler, + rootcomplex_config.ecam_base as usize, + ); + emergency_map_regions.push(( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + )); + } + #[cfg(not(any( + feature = "ecam_pcie", + feature = "dwc_pcie", + feature = "loongarch64_pcie" + )))] + { + warn!( + "No extend config found for base 0x{:x}", + rootcomplex_config.ecam_base + ); + } + } + + // Note: emergency_map_regions requires access to self (for Zone), so this must be handled at Zone level + } } static ZONE_LIST: RwLock>> = RwLock::new(vec![]); @@ -388,29 +841,6 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult> { zone.pt_init(config.memory_regions())?; zone.mmio_init(&config.arch_config); - #[cfg(feature = "pci")] - { - let _ = zone.virtual_pci_mmio_init(&config.pci_config, config.num_pci_bus as usize); - let _ = zone.guest_pci_init( - zone_id, - &config.alloc_pci_devs, - config.num_pci_devs, - &config.pci_config, - config.num_pci_bus as usize, - ); - } - - // #[cfg(target_arch = "aarch64")] - // zone.ivc_init(config.ivc_config()); - - /* loongarch page table emergency */ - /* Kai: Maybe unnecessary but i can't boot vms on my 3A6000 PC without this function. */ - // #[cfg(target_arch = "loongarch64")] - // zone.page_table_emergency( - // config.pci_config[0].ecam_base as _, - // config.pci_config[0].ecam_size as _, - // )?; - let mut cpu_num = 0; for cpu_id in config.cpus().iter() { if let Some(existing_zone) = get_cpu_data(*cpu_id as _).zone.clone() { @@ -427,6 +857,54 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult> { cpu_num += 1; } zone.write().set_cpu_num(cpu_num); + + #[cfg(feature = "pci")] + { + #[cfg(feature = "pci_init_delay")] + { + #[cfg(feature = "dwc_pcie")] + { + let num_pci_bus = config.num_pci_bus as usize; + if zone_id == 0 { + let mut inner = zone.write(); + inner.virtual_pci_dbi_pref_init(&config.pci_config, num_pci_bus); + } else { + let _ = zone.virtual_pci_mmio_init(&config.pci_config, num_pci_bus); + let _ = zone.guest_pci_init( + zone_id, + &config.alloc_pci_devs, + config.num_pci_devs, + &config.pci_config, + num_pci_bus, + ); + } + } + } + + #[cfg(all(feature = "pci", not(feature = "pci_init_delay")))] + { + let _ = zone.virtual_pci_mmio_init(&config.pci_config, config.num_pci_bus as usize); + let _ = zone.guest_pci_init( + zone_id, + &config.alloc_pci_devs, + config.num_pci_devs, + &config.pci_config, + config.num_pci_bus as usize, + ); + } + } + + // #[cfg(target_arch = "aarch64")] + // zone.ivc_init(config.ivc_config()); + + /* loongarch page table emergency */ + /* Kai: Maybe unnecessary but i can't boot vms on my 3A6000 PC without this function. */ + // #[cfg(target_arch = "loongarch64")] + // zone.page_table_emergency( + // config.pci_config[0].ecam_base as _, + // config.pci_config[0].ecam_size as _, + // )?; + let cpu_set = zone.read().cpu_set(); info!("zone cpu_set: {:#b}", cpu_set.bitmap);