From 25ff330b05fb88f49aa002963a6da9b02efa03b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=99=AF=E5=AE=87?= <2537738252@qq.com> Date: Tue, 12 May 2026 21:30:33 +0800 Subject: [PATCH 1/2] feat: virtual riscv-iommu support --- Cargo.toml | 3 +- platform/riscv64/qemu-aia/cargo/features | 1 + platform/riscv64/qemu-aia/image/dts/virt.dts | 11 + platform/riscv64/qemu-aia/image/dts/zone0.dts | 29 +- .../qemu-aia/image/dts/zone1-linux.dts | 11 + platform/riscv64/qemu-aia/platform.mk | 5 +- src/device/iommu/arm_smmu/mod.rs | 2 +- src/device/iommu/dummy_iommu.rs | 2 +- src/device/iommu/intel_vtd/mod.rs | 2 +- src/device/iommu/iommu_trait.rs | 4 +- src/device/iommu/mod.rs | 17 + src/device/iommu/riscv_iommu/cmd.rs | 12 +- src/device/iommu/riscv_iommu/iommu_hw.rs | 101 ++- src/device/iommu/riscv_iommu/mod.rs | 21 +- src/device/iommu/riscv_iommu/reg_bits.rs | 22 +- src/device/iommu/riscv_iommu/viommu.rs | 809 ++++++++++++++++++ src/device/irqchip/aia/mod.rs | 2 +- src/device/irqchip/aia/vimsic.rs | 4 +- src/memory/mm.rs | 14 + src/zone.rs | 8 + 20 files changed, 1019 insertions(+), 61 deletions(-) create mode 100644 src/device/iommu/riscv_iommu/viommu.rs diff --git a/Cargo.toml b/Cargo.toml index 89b4cd1f..361706d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,13 +51,14 @@ uefi-raw = "=0.9.0" ############# general ############## iommu = [] # supported by: aarch64, riscv64 pci = [] # supported by: aarch64, riscv64,loongarch64 -share_s2pt = ["iommu"] print_timestamp = [] # print timestamp when logging ############# IOMMU ############## riscv_iommu = ["iommu"] arm_smmu = ["iommu"] intel_vtd = ["iommu"] +share_s2pt = ["iommu"] +viommu = ["iommu"] # supported by: riscv64 ############# PCIe access mechanism ############## ecam_pcie = [] # Standard ECAM mechanism (default for most platforms) diff --git a/platform/riscv64/qemu-aia/cargo/features b/platform/riscv64/qemu-aia/cargo/features index 2600021b..b0b27b6d 100644 --- a/platform/riscv64/qemu-aia/cargo/features +++ b/platform/riscv64/qemu-aia/cargo/features @@ -4,3 +4,4 @@ pci ecam_pcie riscv_iommu share_s2pt +viommu diff --git a/platform/riscv64/qemu-aia/image/dts/virt.dts b/platform/riscv64/qemu-aia/image/dts/virt.dts index 5e5f7ff3..db8eace6 100644 --- a/platform/riscv64/qemu-aia/image/dts/virt.dts +++ b/platform/riscv64/qemu-aia/image/dts/virt.dts @@ -202,6 +202,16 @@ compatible = "sifive,test1\0sifive,test0\0syscon"; }; + iommu@3010000 { + msi-parent = <0x0a>; + interrupts = <0x24 0x01 0x25 0x01 0x26 0x01 0x27 0x01>; + interrupt-parent = <0x0c>; + reg = <0x00 0x3010000 0x00 0x1000>; + phandle = <0x8000>; + #iommu-cells = <0x01>; + compatible = "riscv,iommu"; + }; + virtio_mmio@10008000 { interrupts = <0x08 0x04>; interrupt-parent = <0x0c>; @@ -315,6 +325,7 @@ pci@30000000 { interrupt-map-mask = <0x1800 0x00 0x00 0x07>; interrupt-map = <0x00 0x00 0x00 0x01 0x0c 0x20 0x04 0x00 0x00 0x00 0x02 0x0c 0x21 0x04 0x00 0x00 0x00 0x03 0x0c 0x22 0x04 0x00 0x00 0x00 0x04 0x0c 0x23 0x04 0x800 0x00 0x00 0x01 0x0c 0x21 0x04 0x800 0x00 0x00 0x02 0x0c 0x22 0x04 0x800 0x00 0x00 0x03 0x0c 0x23 0x04 0x800 0x00 0x00 0x04 0x0c 0x20 0x04 0x1000 0x00 0x00 0x01 0x0c 0x22 0x04 0x1000 0x00 0x00 0x02 0x0c 0x23 0x04 0x1000 0x00 0x00 0x03 0x0c 0x20 0x04 0x1000 0x00 0x00 0x04 0x0c 0x21 0x04 0x1800 0x00 0x00 0x01 0x0c 0x23 0x04 0x1800 0x00 0x00 0x02 0x0c 0x20 0x04 0x1800 0x00 0x00 0x03 0x0c 0x21 0x04 0x1800 0x00 0x00 0x04 0x0c 0x22 0x04>; + iommu-map = <0x00 0x8000 0x00 0x00 0x00 0x8000 0x00 0xffff>; ranges = <0x1000000 0x00 0x00 0x00 0x3000000 0x00 0x10000 0x2000000 0x00 0x40000000 0x00 0x40000000 0x00 0x40000000 0x3000000 0x04 0x00 0x04 0x00 0x04 0x00>; reg = <0x00 0x30000000 0x00 0x10000000>; msi-parent = <0x0a>; diff --git a/platform/riscv64/qemu-aia/image/dts/zone0.dts b/platform/riscv64/qemu-aia/image/dts/zone0.dts index 3b0589b6..6adcf55a 100644 --- a/platform/riscv64/qemu-aia/image/dts/zone0.dts +++ b/platform/riscv64/qemu-aia/image/dts/zone0.dts @@ -103,24 +103,6 @@ phandle = <0x04>; }; }; - - cpu-map { - - cluster0 { - - core0 { - cpu = <0x07>; - }; - - core1 { - cpu = <0x05>; - }; - - core2 { - cpu = <0x03>; - }; - }; - }; }; soc { @@ -159,9 +141,20 @@ compatible = "qemu,imsics\0riscv,imsics"; }; + iommu@3010000 { + msi-parent = <0x0a>; + interrupts = <0x24 0x01 0x25 0x01 0x26 0x01 0x27 0x01>; + interrupt-parent = <0x0c>; + reg = <0x00 0x3010000 0x00 0x1000>; + phandle = <0x8000>; + #iommu-cells = <0x01>; + compatible = "riscv,iommu"; + }; + pci@30000000 { interrupt-map-mask = <0x1800 0x00 0x00 0x07>; interrupt-map = <0x00 0x00 0x00 0x01 0x0c 0x20 0x04 0x00 0x00 0x00 0x02 0x0c 0x21 0x04 0x00 0x00 0x00 0x03 0x0c 0x22 0x04 0x00 0x00 0x00 0x04 0x0c 0x23 0x04 0x800 0x00 0x00 0x01 0x0c 0x21 0x04 0x800 0x00 0x00 0x02 0x0c 0x22 0x04 0x800 0x00 0x00 0x03 0x0c 0x23 0x04 0x800 0x00 0x00 0x04 0x0c 0x20 0x04 0x1000 0x00 0x00 0x01 0x0c 0x22 0x04 0x1000 0x00 0x00 0x02 0x0c 0x23 0x04 0x1000 0x00 0x00 0x03 0x0c 0x20 0x04 0x1000 0x00 0x00 0x04 0x0c 0x21 0x04 0x1800 0x00 0x00 0x01 0x0c 0x23 0x04 0x1800 0x00 0x00 0x02 0x0c 0x20 0x04 0x1800 0x00 0x00 0x03 0x0c 0x21 0x04 0x1800 0x00 0x00 0x04 0x0c 0x22 0x04>; + iommu-map = <0x00 0x8000 0x00 0x00 0x00 0x8000 0x00 0xffff>; ranges = <0x1000000 0x00 0x00 0x00 0x3000000 0x00 0x10000 0x2000000 0x00 0x40000000 0x00 0x40000000 0x00 0x40000000 0x3000000 0x04 0x00 0x04 0x00 0x04 0x00>; reg = <0x00 0x30000000 0x00 0x10000000>; msi-parent = <0x0a>; diff --git a/platform/riscv64/qemu-aia/image/dts/zone1-linux.dts b/platform/riscv64/qemu-aia/image/dts/zone1-linux.dts index 8d510167..17b830ef 100644 --- a/platform/riscv64/qemu-aia/image/dts/zone1-linux.dts +++ b/platform/riscv64/qemu-aia/image/dts/zone1-linux.dts @@ -72,9 +72,20 @@ compatible = "virtio,mmio"; }; + iommu@3010000 { + msi-parent = <0x0a>; + interrupts = <0x24 0x01 0x25 0x01 0x26 0x01 0x27 0x01>; + interrupt-parent = <0x0c>; + reg = <0x00 0x3010000 0x00 0x1000>; + phandle = <0x8000>; + #iommu-cells = <0x01>; + compatible = "riscv,iommu"; + }; + pci@30000000 { interrupt-map-mask = <0x1800 0x00 0x00 0x07>; interrupt-map = <0x00 0x00 0x00 0x01 0x0c 0x20 0x04 0x00 0x00 0x00 0x02 0x0c 0x21 0x04 0x00 0x00 0x00 0x03 0x0c 0x22 0x04 0x00 0x00 0x00 0x04 0x0c 0x23 0x04 0x800 0x00 0x00 0x01 0x0c 0x21 0x04 0x800 0x00 0x00 0x02 0x0c 0x22 0x04 0x800 0x00 0x00 0x03 0x0c 0x23 0x04 0x800 0x00 0x00 0x04 0x0c 0x20 0x04 0x1000 0x00 0x00 0x01 0x0c 0x22 0x04 0x1000 0x00 0x00 0x02 0x0c 0x23 0x04 0x1000 0x00 0x00 0x03 0x0c 0x20 0x04 0x1000 0x00 0x00 0x04 0x0c 0x21 0x04 0x1800 0x00 0x00 0x01 0x0c 0x23 0x04 0x1800 0x00 0x00 0x02 0x0c 0x20 0x04 0x1800 0x00 0x00 0x03 0x0c 0x21 0x04 0x1800 0x00 0x00 0x04 0x0c 0x22 0x04>; + iommu-map = <0x00 0x8000 0x00 0x00 0x00 0x8000 0x00 0xffff>; ranges = <0x1000000 0x00 0x00 0x00 0x3000000 0x00 0x10000 0x2000000 0x00 0x40000000 0x00 0x40000000 0x00 0x40000000 0x3000000 0x04 0x00 0x04 0x00 0x04 0x00>; reg = <0x00 0x30000000 0x00 0x10000000>; msi-parent = <0x0a>; diff --git a/platform/riscv64/qemu-aia/platform.mk b/platform/riscv64/qemu-aia/platform.mk index c53c431a..d2b35f27 100644 --- a/platform/riscv64/qemu-aia/platform.mk +++ b/platform/riscv64/qemu-aia/platform.mk @@ -25,11 +25,12 @@ QEMU_ARGS += -device loader,file="$(zone0_dtb)",addr=0x8f000000,force-raw=on QEMU_ARGS += -drive if=none,file=$(FSIMG1),id=hd0,format=raw # QEMU_ARGS += -device virtio-blk-device,drive=hd0,bus=virtio-mmio-bus.7 -QEMU_ARGS += -device virtio-blk-pci,drive=hd0,disable-legacy=on,disable-modern=off,addr=01.0 +QEMU_ARGS += -device virtio-blk-pci,drive=hd0,disable-legacy=on,disable-modern=off,addr=01.0,iommu_platform=on QEMU_ARGS += -device virtio-serial-device,bus=virtio-mmio-bus.6 -chardev pty,id=X10007000 -device virtconsole,chardev=X10007000 -S QEMU_ARGS += -drive if=none,file=$(FSIMG2),id=hd1,format=qcow2 # QEMU_ARGS += -device virtio-blk-device,drive=hd1,bus=virtio-mmio-bus.5 -QEMU_ARGS += -device virtio-blk-pci,drive=hd1,disable-legacy=on,disable-modern=off,addr=02.0 +QEMU_ARGS += -device virtio-blk-pci,drive=hd1,disable-legacy=on,disable-modern=off,addr=02.0,iommu_platform=on +# QEMU_ARGS += -d trace:riscv_iommu_* # ------------------------------------------------------------------- # QEMU_ARGS := -machine virt diff --git a/src/device/iommu/arm_smmu/mod.rs b/src/device/iommu/arm_smmu/mod.rs index b395483b..ae2d7ca4 100644 --- a/src/device/iommu/arm_smmu/mod.rs +++ b/src/device/iommu/arm_smmu/mod.rs @@ -84,7 +84,7 @@ impl Iommu for ArmSmmu { zone_id ); } - fn viommu_mmio_handler(&self, zone: &mut Zone, viommu_base: usize, viommu_size: usize) { + fn viommu_mmio_handler_register(&self, zone: &Zone, _viommu_base: usize, _viommu_size: usize) { todo!( "ArmSmmu viommu handler for zone id {} not implemented yet.", zone.id() diff --git a/src/device/iommu/dummy_iommu.rs b/src/device/iommu/dummy_iommu.rs index 4e7f072e..3b5d59a8 100644 --- a/src/device/iommu/dummy_iommu.rs +++ b/src/device/iommu/dummy_iommu.rs @@ -48,7 +48,7 @@ impl Iommu for DummyIommu { fn viommu_init(&self, zone_id: usize) { info!("No IOMMU implementation available, cannot initialize VIOMMU for Zone id {}", zone_id); } - fn viommu_mmio_handler(&self, zone: &mut Zone, _viommu_base: usize, _viommu_size: usize) { + fn viommu_mmio_handler_register(&self, zone: &Zone, _viommu_base: usize, _viommu_size: usize) { info!("No IOMMU implementation available, cannot handle VIOMMU MMIO for Zone id {}", zone.id()); } } diff --git a/src/device/iommu/intel_vtd/mod.rs b/src/device/iommu/intel_vtd/mod.rs index 9470724d..92c85b2c 100644 --- a/src/device/iommu/intel_vtd/mod.rs +++ b/src/device/iommu/intel_vtd/mod.rs @@ -85,7 +85,7 @@ impl Iommu for IntelVtd { zone_id ); } - fn viommu_mmio_handler(&self, zone: &mut Zone, viommu_base: usize, viommu_size: usize) { + fn viommu_mmio_handler_register(&self, zone: &Zone, _viommu_base: usize, _viommu_size: usize) { todo!( "IntelVtd viommu handler for zone id {} not implemented yet.", zone.id() diff --git a/src/device/iommu/iommu_trait.rs b/src/device/iommu/iommu_trait.rs index 9cef4b6c..4a1cf61f 100644 --- a/src/device/iommu/iommu_trait.rs +++ b/src/device/iommu/iommu_trait.rs @@ -36,6 +36,6 @@ pub(crate) trait Iommu { fn interrupt_handler(&self, irq_id: usize); /// Initialize the Virtual IOMMU for the Zone fn viommu_init(&self, zone_id: usize); - /// Virtual IOMMU MMIO handler for the Zone - fn viommu_mmio_handler(&self, zone: &mut Zone, viommu_base: usize, viommu_size: usize); + /// Register the Virtual IOMMU MMIO handler for the Zone + fn viommu_mmio_handler_register(&self, zone: &Zone, viommu_base: usize, viommu_size: usize); } diff --git a/src/device/iommu/mod.rs b/src/device/iommu/mod.rs index 548bc392..71b52c36 100644 --- a/src/device/iommu/mod.rs +++ b/src/device/iommu/mod.rs @@ -22,6 +22,7 @@ mod iommu_impl; mod iommu_trait; use crate::consts::MAX_ZONE_NUM; +use crate::zone::Zone; use iommu_impl::iommu_impl; use iommu_trait::Iommu; @@ -60,6 +61,22 @@ pub fn iommu_add_device_with_root_pt_addr(zone_id: usize, did: usize, root_pt_ad } } +/// Public interface for initializing the Virtual IOMMU for the Zone +pub(crate) fn viommu_init(zone_id: usize) { + match check_zone_id(zone_id) { + Ok(()) => { + iommu_impl().viommu_init(zone_id); + } + Err(e) => { + warn!("{}", e); + } + } +} + +pub(crate) fn viommu_mmio_handler_register(zone: &Zone, viommu_base: usize, viommu_size: usize) { + iommu_impl().viommu_mmio_handler_register(zone, viommu_base, viommu_size); +} + ///////////////////////////////////////////////////////////////////////// // Below pub apis are used for compatibility for old code(for x86_64) // // These apis will be replaced by IOMMU trait later. // diff --git a/src/device/iommu/riscv_iommu/cmd.rs b/src/device/iommu/riscv_iommu/cmd.rs index 128881b0..668e9636 100644 --- a/src/device/iommu/riscv_iommu/cmd.rs +++ b/src/device/iommu/riscv_iommu/cmd.rs @@ -16,9 +16,9 @@ #![allow(unused)] -const IOTINVAL_OPCODE: u8 = 1; -const IOFENCE_OPCODE: u8 = 2; -const IODIR_OPCODE: u8 = 3; +pub(super) const IOTINVAL_OPCODE: u8 = 1; +pub(super) const IOFENCE_OPCODE: u8 = 2; +pub(super) const IODIR_OPCODE: u8 = 3; // Dword0 and dword1 are the two 64-bit words that make up the command. #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] @@ -41,7 +41,7 @@ pub(super) enum IotInvalFunc { } impl IotInvalFunc { - fn raw(self) -> u8 { + pub(super) fn raw(self) -> u8 { match self { Self::Vma => 0, Self::Gvma => 1, @@ -103,7 +103,7 @@ pub(super) enum IoFenceFunc { } impl IoFenceFunc { - fn raw(self) -> u8 { + pub(super) fn raw(self) -> u8 { match self { Self::C => 0, } @@ -150,7 +150,7 @@ pub(super) enum IoDirFunc { } impl IoDirFunc { - fn raw(self) -> u8 { + pub(super) fn raw(self) -> u8 { match self { Self::InvalDdt => 0, Self::InvalPdt => 1, diff --git a/src/device/iommu/riscv_iommu/iommu_hw.rs b/src/device/iommu/riscv_iommu/iommu_hw.rs index 0b1386fd..c79faa8d 100644 --- a/src/device/iommu/riscv_iommu/iommu_hw.rs +++ b/src/device/iommu/riscv_iommu/iommu_hw.rs @@ -174,6 +174,88 @@ pub fn iommu_msi_pt_tlb_invalid(gscid: u16, msi_gpa: usize) { info!("RISC-V: iommu_msi_pt_tlb_invalid do nothing now"); } +/// IOMMU register enumeration, used to access IOMMU registers for viommu module. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(super) enum IommuReg { + Caps, + Fctl, + Cqcsr, + Fqcsr, + Ipsr, + Icvec, +} + +pub(super) fn iommu_read_reg(reg: IommuReg) -> u64 { + let iommu = get_iommu().lock(); + let hw = iommu.iommu(); + + match reg { + IommuReg::Caps => hw.caps.get(), + IommuReg::Fctl => hw.fctl.get() as u64, + IommuReg::Cqcsr => hw.cqcsr.get() as u64, + IommuReg::Fqcsr => hw.fqcsr.get() as u64, + IommuReg::Ipsr => hw.ipsr.get() as u64, + IommuReg::Icvec => hw.icvec.get(), + } +} + +pub(super) fn iommu_write_reg(reg: IommuReg, value: u64) { + let iommu = get_iommu().lock(); + let hw = iommu.iommu(); + + match reg { + IommuReg::Cqcsr => hw.cqcsr.set(value as u32), + IommuReg::Fqcsr => hw.fqcsr.set(value as u32), + IommuReg::Ipsr => hw.ipsr.set(value as u32), + IommuReg::Icvec => hw.icvec.set(value), + IommuReg::Caps | IommuReg::Fctl => { + warn!( + "RV IOMMU: ignore unsupported write to mirrored register {:?}", + reg + ); + } + } +} + +/// Add a raw command to physical IOMMU command queue. +pub(super) fn iommu_add_raw_command(command: RiscvIommuCommand) { + get_iommu().lock().rv_iommu_add_command(command); +} + +/// Device directory table field enumeration, used to access device directory table fields for viommu module. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(super) enum IommuDdtField { + Iohgatp, + Ta, + Fsc, +} + +/// Read a specific device directory table entry field. +pub(super) fn iommu_read_ddt_field(device_id: usize, field: IommuDdtField) -> Option { + let mut iommu = get_iommu().lock(); + let (entry, _) = iommu.ddt.get_or_alloc_leaf_entry(device_id)?; + + Some(match field { + IommuDdtField::Iohgatp => entry.iohgatp.get(), + IommuDdtField::Ta => entry.ta.get(), + IommuDdtField::Fsc => entry.fsc.get(), + }) +} + +/// Write a specific device directory table entry field. +pub(super) fn iommu_write_ddt_field(device_id: usize, field: IommuDdtField, value: u64) -> bool { + let mut iommu = get_iommu().lock(); + if let Some((entry, _)) = iommu.ddt.get_or_alloc_leaf_entry(device_id) { + match field { + IommuDdtField::Iohgatp => entry.iohgatp.set(value), + IommuDdtField::Ta => entry.ta.set(value), + IommuDdtField::Fsc => entry.fsc.set(value), + } + return true; + } + false +} + /// Initialize RISC-V IOMMU with hardware DDTP probing. fn riscv_iommu_init() { assert!( @@ -240,13 +322,17 @@ impl IommuHw { } fn rv_iommu_check_features(&self) { - let version = self.caps.read(IOMMU_CAPS::VERSION); // Stop and report failure if capabilities.version is not supported. - if version != IOMMU_CAPS::VERSION::VERSION_1_0.value { - panic!( - "RISC-V IOMMU unsupported version: {}, Please check the IOMMU version", - version - ); + match self.caps.read_as_enum(IOMMU_CAPS::VERSION) { + Some(IOMMU_CAPS::VERSION::Value::VERSION_1_0) => { + info!("RISC-V IOMMU version 1.0 supported"); + } + _ => { + panic!( + "RISC-V IOMMU unsupported version: {}, Please check the IOMMU version", + self.caps.read(IOMMU_CAPS::VERSION) + ); + } } // Note: here RISCV-IOMMU and CPU share the same stage-2 page table. let cpu_s2pt_lvl = unsafe { crate::arch::s2pt::GSTAGE_PT_LEVEL }; @@ -264,9 +350,6 @@ impl IommuHw { // Current DDT Entry only supports Extented-for todo!("To support Base-format DDT Entry"); } - if self.caps.read(IOMMU_CAPS::IGS) == IOMMU_CAPS::IGS::MSI.value { - warn!("RISC-V IOMMU HW does not support WSI generation"); - } } fn rv_iommu_init( diff --git a/src/device/iommu/riscv_iommu/mod.rs b/src/device/iommu/riscv_iommu/mod.rs index 5a5f1b8a..0bbe9ca9 100644 --- a/src/device/iommu/riscv_iommu/mod.rs +++ b/src/device/iommu/riscv_iommu/mod.rs @@ -30,12 +30,16 @@ mod cmd; mod iommu_hw; mod reg_bits; +#[cfg(feature = "viommu")] +mod viommu; use super::Iommu; use crate::zone::Zone; use cmd::*; use iommu_hw::*; pub use iommu_hw::{iommu_msi_pt_tlb_invalid, iommu_remove_device}; +#[cfg(feature = "viommu")] +use viommu::{viommu_init, viommu_mmio_handler_register}; pub(super) struct RiscvIommu; @@ -81,15 +85,18 @@ impl Iommu for RiscvIommu { ); } fn viommu_init(&self, zone_id: usize) { - todo!( - "RiscvIommu viommu init for zone id {} not implemented yet.", + #[cfg(feature = "viommu")] + viommu_init(zone_id); + #[cfg(not(feature = "viommu"))] + warn!( + "Virtual IOMMU is not enabled, skipping viommu init for zone {}", zone_id ); } - fn viommu_mmio_handler(&self, zone: &mut Zone, _viommu_base: usize, _viommu_size: usize) { - todo!( - "RiscvIommu viommu handler for zone id {} not implemented yet.", - zone.id() - ); + fn viommu_mmio_handler_register(&self, zone: &Zone, viommu_base: usize, viommu_size: usize) { + #[cfg(feature = "viommu")] + viommu_mmio_handler_register(zone, viommu_base, viommu_size); + #[cfg(not(feature = "viommu"))] + warn!("Virtual IOMMU is not enabled, skipping viommu mmio handler for zone {}, viommu_base {}, viommu_size {}.", zone.id(), viommu_base, viommu_size); } } diff --git a/src/device/iommu/riscv_iommu/reg_bits.rs b/src/device/iommu/riscv_iommu/reg_bits.rs index 93273589..d4b093b5 100644 --- a/src/device/iommu/riscv_iommu/reg_bits.rs +++ b/src/device/iommu/riscv_iommu/reg_bits.rs @@ -54,7 +54,7 @@ register_bitfields! { NL OFFSET(42) NUMBITS(1) [], S OFFSET(43) NUMBITS(1) [], ], - pub(super) IOMMU_DDTP [ // RISCV-IOMMU Spec Chap6.5 Device-directory table pointer + pub(super) IOMMU_DDTP [ // RISC-V IOMMU Spec Chap6.5 Device-directory table pointer MODE OFFSET(0) NUMBITS(4) [ OFF = 0, BARE = 1, @@ -65,7 +65,7 @@ register_bitfields! { BUSY OFFSET(4) NUMBITS(1) [], PPN OFFSET(10) NUMBITS(44) [] ], - pub(super) DDT_TC [ // RISCV-IOMMU Spec Chap3.1.3.1 Translation Control + pub(super) DDT_TC [ // RISC-V IOMMU Spec Chap3.1.3.1 Translation Control V OFFSET(0) NUMBITS(1) [], EN_ATS OFFSET(1) NUMBITS(1) [], EN_PRI OFFSET(2) NUMBITS(1) [], @@ -79,7 +79,7 @@ register_bitfields! { SBE OFFSET(10) NUMBITS(1) [], SXL OFFSET(11) NUMBITS(1) [] ], - pub(super) DDT_IOHGATP [ // RISCV-IOMMU Spec Chap3.1.3.2 IO hypervisor guest address translation and protection + pub(super) DDT_IOHGATP [ // RISC-V IOMMU Spec Chap3.1.3.2 IO hypervisor guest address translation and protection PPN OFFSET(0) NUMBITS(44) [], GSCID OFFSET(44) NUMBITS(16) [], MODE OFFSET(60) NUMBITS(4) [ @@ -88,12 +88,12 @@ register_bitfields! { SV57X4 = 10 ] ], - pub(super) DDT_TA [ // RISCV-IOMMU Spec Chap3.1.3.3 Translation attributes + pub(super) DDT_TA [ // RISC-V IOMMU Spec Chap3.1.3.3 Translation attributes PS_CID OFFSET(12) NUMBITS(20) [], RCID OFFSET(40) NUMBITS(12) [], MTYPE OFFSET(52) NUMBITS(12) [], ], - pub(super) DDT_FSC [ // RISCV-IOMMU Spec Chap3.1.3.4 First-stage context + pub(super) DDT_FSC [ // RISC-V IOMMU Spec Chap3.1.3.4 First-stage context MODE OFFSET(60) NUMBITS(4) [ BARE = 0, SV39 = 8, @@ -102,12 +102,12 @@ register_bitfields! { ], PPN OFFSET(0) NUMBITS(44) [] ], - pub(super) DDT_DIR [ // RISCV-IOMMU Spec Chap3.1.1 Non-leaf DDT entry + pub(super) DDT_DIR [ // RISC-V IOMMU Spec Chap3.1.1 Non-leaf DDT entry V OFFSET(0) NUMBITS(1) [], PPN OFFSET(10) NUMBITS(44) [] ], pub(super) IOMMU_XQB [ // RISC-V IOMMU Spec Chap6.6 Command-queue base - // RISC-V IOMMU Spec Chap6.9 Fault queue base + // RISC-V IOMMU Spec Chap6.9 Fault queue base LOG2SZ_1 OFFSET(0) NUMBITS(5) [], PPN OFFSET(10) NUMBITS(44) [] ], @@ -123,12 +123,12 @@ register_bitfields! { register_bitfields! { u32, - pub(super) IOMMU_FCTL [ // RISCV-IOMMU Spec Chap6.4 Features-control register + pub(super) IOMMU_FCTL [ // RISC-V IOMMU Spec Chap6.4 Features-control register BE OFFSET(0) NUMBITS(1) [], WSI OFFSET(1) NUMBITS(1) [], GXL OFFSET(2) NUMBITS(1) [], ], - pub(super) IOMMU_CQCSR [ // RISCV-IOMMU Spec Chap6.15 Command-queue CSR + pub(super) IOMMU_CQCSR [ // RISC-V IOMMU Spec Chap6.15 Command-queue CSR CQEN OFFSET(0) NUMBITS(1) [], CIE OFFSET(1) NUMBITS(1) [], CQMF OFFSET(8) NUMBITS(1) [], @@ -138,7 +138,7 @@ register_bitfields! { CQON OFFSET(16) NUMBITS(1) [], BUSY OFFSET(17) NUMBITS(1) [], ], - pub(super) IOMMU_FQCSR [ // RISCV-IOMMU Spec Chap6.16 Fault-queue CSR + pub(super) IOMMU_FQCSR [ // RISC-V IOMMU Spec Chap6.16 Fault-queue CSR FQEN OFFSET(0) NUMBITS(1) [], FIE OFFSET(1) NUMBITS(1) [], FQMF OFFSET(8) NUMBITS(1) [], @@ -146,7 +146,7 @@ register_bitfields! { FQON OFFSET(16) NUMBITS(1) [], BUSY OFFSET(17) NUMBITS(1) [], ], - pub(super) IOMMU_IPSR [ // RISCV-IOMMU Spec Chap6.18 Interrupt pending status register + pub(super) IOMMU_IPSR [ // RISC-V IOMMU Spec Chap6.18 Interrupt pending status register CIP OFFSET(0) NUMBITS(1) [], FIP OFFSET(1) NUMBITS(1) [], PMIP OFFSET(2) NUMBITS(1) [], diff --git a/src/device/iommu/riscv_iommu/viommu.rs b/src/device/iommu/riscv_iommu/viommu.rs new file mode 100644 index 00000000..2e603b66 --- /dev/null +++ b/src/device/iommu/riscv_iommu/viommu.rs @@ -0,0 +1,809 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// Jingyu Liu for IommuMode { + type Error = (); + + fn try_from(value: usize) -> Result { + match value as u8 { + 0 => Ok(Self::Off), + 1 => Ok(Self::Bare), + 2 => Ok(Self::Ddt1Lvl), + 3 => Ok(Self::Ddt2Lvl), + 4 => Ok(Self::Ddt3Lvl), + _ => Err(()), + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ViommuRegion { + Regs, + Ddt, +} + +impl ViommuRegion { + fn label(self) -> &'static str { + match self { + Self::Regs => "", + Self::Ddt => "ddt ", + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum MmioAccessType { + Read, + Write, +} + +impl MmioAccessType { + fn from_is_write(is_write: bool) -> Self { + if is_write { + Self::Write + } else { + Self::Read + } + } +} + +lazy_static! { + /// Global viommu array, one viommu instance for each zone. + static ref VIOMMU_ARR: Mutex>>> = + Mutex::new(vec![None; MAX_ZONE_NUM]); +} + +fn validate_zone_id(zone_id: usize) -> bool { + if zone_id >= MAX_ZONE_NUM { + error!("Invalid zone id: {}", zone_id); + return false; + } + true +} + +/// Create one viommu instance for target zone. +pub(super) fn viommu_init(zone_id: usize) { + if !validate_zone_id(zone_id) { + return; + } + + let mut viommu_arr = VIOMMU_ARR.lock(); + if viommu_arr[zone_id].is_some() { + warn!("Zone {}'s Virtual IOMMU already initialized.", zone_id); + return; + } + viommu_arr[zone_id] = Some(Arc::new(VirtualIommu::new())); + info!("Zone {}'s Virtual IOMMU initialized.", zone_id); +} + +/// Register viommu mmio handler for target zone. +pub(super) fn viommu_mmio_handler_register(zone: &Zone, viommu_base: usize, viommu_size: usize) { + zone.write() + .mmio_region_register(viommu_base, viommu_size, viommu_emul_handler, zone.id()); +} + +/// Handle Zone's iommu mmio access. +fn viommu_emul_handler(mmio: &mut MMIOAccess, zone_id: usize) -> HvResult { + viommu_mmio_emul_handler(mmio, zone_id, ViommuRegion::Regs) +} + +fn get_viommu_by_zone_id(zone_id: usize) -> Option> { + if !validate_zone_id(zone_id) { + return None; + } + let viommu_array = VIOMMU_ARR.lock(); + // ref, don't move viommu out of the array + match &viommu_array[zone_id] { + Some(viommu) => Some(Arc::clone(viommu)), + None => { + error!("VirtualIommu for Zone {} does not exist!", zone_id); + None + } + } +} + +/// Handle Zone's iommu ddt mmio access. +fn viommu_ddt_emul_handler(mmio: &mut MMIOAccess, zone_id: usize) -> HvResult { + viommu_mmio_emul_handler(mmio, zone_id, ViommuRegion::Ddt) +} + +fn viommu_mmio_emul_handler( + mmio: &mut MMIOAccess, + zone_id: usize, + region: ViommuRegion, +) -> HvResult { + let Some(viommu) = get_viommu_by_zone_id(zone_id) else { + warn!( + "vIOMMU {}mmio access for non-initialized zone {}", + region.label(), + zone_id + ); + return Ok(()); + }; + let Some(zone) = find_zone(zone_id) else { + warn!( + "vIOMMU {}mmio access for unknown zone {}", + region.label(), + zone_id + ); + return Ok(()); + }; + let access = MmioAccessType::from_is_write(mmio.is_write); + let value = match region { + ViommuRegion::Regs => { + viommu.viommu_emul_access(&zone, mmio.address, mmio.size, mmio.value, access) + } + ViommuRegion::Ddt => { + viommu.viommu_ddt_emul_access(&zone, mmio.address, mmio.size, mmio.value, access) + } + }; + if access == MmioAccessType::Read { + mmio.value = value as usize; + } + Ok(()) +} + +/// Virtual IOMMU device structure +struct VirtualIommu { + /// Multithread safe inner structure + inner: Mutex, +} + +/// Virtual IOMMU +struct VirtualIommuInner { + regs: ViommuRegs, + cq: CommandQueueState, + fq: FaultQueueState, + ddt: DdtShadowState, +} + +struct ViommuRegs { + caps: u64, + fctl: u32, + ddtp: u64, +} + +struct CommandQueueState { + base: u64, + head: u32, + tail: u32, + num_entries: u32, + gpa: u64, +} + +struct FaultQueueState { + base: u64, + head: u32, + tail: u32, +} + +struct DdtShadowState { + tc: Vec, +} + +struct VcpuSuspendGuard { + cpu_set: CpuSet, +} + +impl VcpuSuspendGuard { + fn new(cpu_set: CpuSet) -> Self { + send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_SUSPEND); + riscv::asm::fence(); + wait_for_other_vcpus_suspend(cpu_set); + Self { cpu_set } + } +} + +impl Drop for VcpuSuspendGuard { + fn drop(&mut self) { + send_event_to_all(self.cpu_set, 0, IPI_EVENT_VCPU_RESUME); + signal_other_vcpus_resume(self.cpu_set); + } +} + +impl VirtualIommu { + fn new() -> Self { + Self { + inner: Mutex::new(VirtualIommuInner::new()), + } + } + + /// vIOMMU emul access. + fn viommu_emul_access( + &self, + zone: &Zone, + offset: usize, + size: usize, + value: usize, + access: MmioAccessType, + ) -> u64 { + self.inner + .lock() + .viommu_emul_access(zone, offset, size, value, access) + } + + /// vIOMMU ddt emul access. + fn viommu_ddt_emul_access( + &self, + zone: &Zone, + offset: usize, + size: usize, + value: usize, + access: MmioAccessType, + ) -> u64 { + self.inner + .lock() + .viommu_ddt_emul_access(zone, offset, size, value, access) + } +} + +impl ViommuRegs { + fn new() -> Self { + let mut caps = iommu_read_reg(IommuReg::Caps); + caps &= !VIOMMU_MASKED_CAPS; + // Don't support MSI irq generated by IOMMU self. + caps |= IOMMU_CAPS::IGS::WSI.value; + + Self { + caps, + fctl: 0, + ddtp: 0, + } + } +} + +impl CommandQueueState { + fn new() -> Self { + Self { + base: 0, + head: 0, + tail: 0, + num_entries: 0, + gpa: 0, + } + } +} + +impl FaultQueueState { + fn new() -> Self { + Self { + base: 0, + head: 0, + tail: 0, + } + } +} + +impl DdtShadowState { + fn new() -> Self { + Self { + tc: vec![0; MAX_VIOMMU_DDT_DEVICES], + } + } +} + +impl VirtualIommuInner { + fn new() -> Self { + Self { + regs: ViommuRegs::new(), + cq: CommandQueueState::new(), + fq: FaultQueueState::new(), + ddt: DdtShadowState::new(), + } + } + + /// vIOMMU emul access inner. + fn viommu_emul_access( + &mut self, + zone: &Zone, + offset: usize, + size: usize, + value: usize, + access: MmioAccessType, + ) -> u64 { + // The current emulation dispatches by register range; access width is + // accepted as provided by the common MMIO layer. + let _ = size; + let zone_id = zone.id(); + match access { + MmioAccessType::Read => self.read_reg_access(offset), + MmioAccessType::Write => { + self.write_reg_access(zone, zone_id, offset, value); + 0 + } + } + } + + fn read_reg_access(&self, offset: usize) -> u64 { + match offset { + REG_CAPS_START..=REG_CAPS_END => { + info!("vIOMMU caps: {:#x}", self.regs.caps); + self.regs.caps + } + REG_FCTL_START..=REG_FCTL_END => self.regs.fctl as u64, + REG_DDTP_START..=REG_DDTP_END => self.regs.ddtp, + REG_CQB_START..=REG_CQB_END => self.cq.base, + REG_CQH_START..=REG_CQH_END => self.cq.head as u64, + REG_CQT_START..=REG_CQT_END => self.cq.tail as u64, + REG_FQB_START..=REG_FQB_END => self.fq.base, + REG_FQH_START..=REG_FQH_END => self.fq.head as u64, + REG_FQT_START..=REG_FQT_END => self.fq.tail as u64, + // We only keep minimum compatibility for CSR mirrors now. + REG_CQCSR_START..=REG_CQCSR_END => iommu_read_reg(IommuReg::Cqcsr), + REG_FQCSR_START..=REG_FQCSR_END => iommu_read_reg(IommuReg::Fqcsr), + REG_IPSR_START..=REG_IPSR_END => iommu_read_reg(IommuReg::Ipsr), + REG_ICVEC_START..=REG_ICVEC_END => iommu_read_reg(IommuReg::Icvec), + _ => { + warn!("vIOMMU mmio access offset {:#x} not supported", offset); + 0 + } + } + } + + fn write_reg_access(&mut self, zone: &Zone, zone_id: usize, offset: usize, value: usize) { + match offset { + REG_CAPS_START..=REG_CAPS_END => { + error!("Capabilities register is read-only!"); + } + REG_FCTL_START..=REG_FCTL_END => { + let host_fctl = iommu_read_reg(IommuReg::Fctl); + if value != host_fctl as usize { + error!( + "vIOMMU fctl write value {:#x} not match host fctl {:#x}!", + value, host_fctl + ); + } else { + self.regs.fctl = value as u32; + } + } + REG_DDTP_START..=REG_DDTP_END => { + self.handle_ddtp_write(zone, value); + } + REG_CQB_START..=REG_CQB_END => self.handle_cqb_write(value), + REG_CQH_START..=REG_CQH_END => { + error!("vIOMMU cqh is read-only!"); + } + REG_CQT_START..=REG_CQT_END => self.handle_cqt_write(zone_id, value), + REG_FQB_START..=REG_FQB_END => { + self.fq.base = value as u64; + } + REG_FQH_START..=REG_FQH_END => { + self.fq.head = value as u32; + } + REG_FQT_START..=REG_FQT_END => { + self.fq.tail = value as u32; + } + // We only keep minimum compatibility for CSR mirrors now. + REG_CQCSR_START..=REG_CQCSR_END => iommu_write_reg(IommuReg::Cqcsr, value as u64), + REG_FQCSR_START..=REG_FQCSR_END => iommu_write_reg(IommuReg::Fqcsr, value as u64), + REG_IPSR_START..=REG_IPSR_END => iommu_write_reg(IommuReg::Ipsr, value as u64), + REG_ICVEC_START..=REG_ICVEC_END => iommu_write_reg(IommuReg::Icvec, value as u64), + _ => { + warn!("vIOMMU mmio access offset {:#x} not supported", offset); + } + } + } + + fn handle_ddtp_write(&mut self, zone: &Zone, value: usize) -> bool { + info!("vIOMMU ddtp write value: {:#x}", value); + let mode_raw = ((value as u64 & IOMMU_DDTP::MODE.mask) >> IOMMU_DDTP::MODE.shift) as usize; + match IommuMode::try_from(mode_raw) { + Ok(mode) => { + info!("Guest try to set vIOMMU mode to {:?}", mode); + match mode { + IommuMode::Off | IommuMode::Bare => { + self.regs.ddtp = value as u64; + } + IommuMode::Ddt1Lvl => { + if !self.handle_ddt1lvl_mode(zone, value) { + return false; + } + self.regs.ddtp = value as u64; + } + IommuMode::Ddt2Lvl | IommuMode::Ddt3Lvl => { + info!("vIOMMU ddtp mode {:?} not supported yet!", mode); + } + } + } + Err(_) => { + error!("vIOMMU ddtp mode {:#x} not supported!", mode_raw as u8); + } + } + true + } + + fn handle_ddt1lvl_mode(&mut self, zone: &Zone, value: usize) -> bool { + let mut zone_inner = zone.write(); + let ppn = ((value as u64 & IOMMU_DDTP::PPN.mask) >> IOMMU_DDTP::PPN.shift) as usize; + let ddt_gpa = ppn << DDTP_PPN_TO_GPA_SHIFT; + info!("vIOMMU's DDT Table GPA: {:#x}", ddt_gpa); + + let Some(region) = zone_inner.gpm_mut().get_region(ddt_gpa as GuestPhysAddr) else { + error!("vIOMMU ddtp region not found in gpm!"); + return false; + }; + + // We unmap this page to trigger a page fault when the guest accesses it. + let cpu_set = zone_inner.cpu_set(); + let _resume_guard = VcpuSuspendGuard::new(cpu_set); + + let gpm = zone_inner.gpm_mut(); + if let Err(err) = gpm.delete(region.start, region.size) { + error!("vIOMMU ddtp region delete failed: {:?}", err); + return false; + } + + let region_start = region.start; + let region_end = region.start + region.size; + if region_start < ddt_gpa { + let left_region = MemoryRegion::new_with_offset_mapper( + region_start as GuestPhysAddr, + region_start, + ddt_gpa - region_start, + region.flags, + ); + if let Err(err) = gpm.insert(left_region) { + error!("vIOMMU ddtp left region insert failed: {:?}", err); + return false; + } + } + if region_end > ddt_gpa + VIOMMU_DDT1LVL_SIZE { + // For 1LVL DDT, the region size is one 4KiB page. + let right_start = ddt_gpa + VIOMMU_DDT1LVL_SIZE; + let right_region = MemoryRegion::new_with_offset_mapper( + right_start as GuestPhysAddr, + right_start, + region_end - right_start, + region.flags, + ); + if let Err(err) = gpm.insert(right_region) { + error!("vIOMMU ddtp right region insert failed: {:?}", err); + return false; + } + } + info!("gpm after unmap vIOMMU ddtp region: {:#x?}", gpm); + // SAFETY: flush stage-2 translations after changing guest mappings. + unsafe { riscv_h::asm::hfence_gvma(0, 0) }; + // Keep zone_id as 0 for now to preserve current behavior. + zone_inner.mmio_region_register(ddt_gpa, VIOMMU_DDT1LVL_SIZE, viommu_ddt_emul_handler, 0); + true + } + + fn handle_cqb_write(&mut self, value: usize) { + let value_u64 = value as u64; + let ppn = (value_u64 & IOMMU_XQB::PPN.mask) >> IOMMU_XQB::PPN.shift; + let log2sz_1 = (value_u64 & IOMMU_XQB::LOG2SZ_1.mask) >> IOMMU_XQB::LOG2SZ_1.shift; + let mut num_entries = 1u64 << (log2sz_1 + 1); + let mut new_value = value_u64; + if num_entries > CQ_MAX_ENTRIES { + new_value = (value_u64 & !IOMMU_XQB::LOG2SZ_1.mask) | CQ_LOG2SZ_1_CAP; + num_entries = CQ_MAX_ENTRIES; + } + self.cq.num_entries = num_entries as u32; + self.cq.gpa = ppn << DDTP_PPN_TO_GPA_SHIFT; + self.cq.base = new_value; + info!( + "vIOMMU cqb: {:#x}, cq_gpa: {:#x}, cq_num_entries: {}", + self.cq.base, self.cq.gpa, self.cq.num_entries + ); + } + + fn handle_cqt_write(&mut self, zone_id: usize, value: usize) { + if self.cq.num_entries == 0 { + warn!("vIOMMU cqt write before cqb init, ignoring"); + return; + } + + let new_tail = value as u32 % self.cq.num_entries; + let mut cqh = self.cq.head; + while cqh != new_tail { + let cqe_addr = (self.cq.gpa as usize) + + (cqh as usize % self.cq.num_entries as usize) * CQ_ENTRY_SIZE; + let (dword0, dword1) = Self::read_cq_entry(cqe_addr); + self.dispatch_cq_command(zone_id, dword0, dword1); + cqh = (cqh + 1) % self.cq.num_entries; + } + self.cq.tail = new_tail; + self.cq.head = new_tail; + } + + fn dispatch_cq_command(&self, zone_id: usize, dword0: u64, dword1: u64) { + let opcode = dword0 & CMD_OPCODE_MASK; + let func3 = (dword0 >> CMD_FUNC3_SHIFT) & CMD_FUNC3_MASK; + match opcode { + op if op == u64::from(IOTINVAL_OPCODE) => { + let vma_raw = u64::from(IotInvalFunc::Vma.raw()); + let gvma_raw = u64::from(IotInvalFunc::Gvma.raw()); + if func3 == vma_raw { + // Guest VMA command is always bound to this zone's GSCID. + let mut out0 = dword0; + out0 |= 1u64 << IOTINVAL_GV_SHIFT; + out0 &= !(IOTINVAL_GSCID_MASK << IOTINVAL_GSCID_SHIFT); + out0 |= ((zone_id as u64) & IOTINVAL_GSCID_MASK) << IOTINVAL_GSCID_SHIFT; + iommu_add_raw_command(RiscvIommuCommand { + dword0: out0, + dword1, + }); + } else if func3 == gvma_raw { + iommu_add_raw_command(RiscvIommuCommand { dword0, dword1 }); + } else { + warn!("vIOMMU IOTINVAL func3={} not supported", func3); + } + } + op if op == u64::from(IOFENCE_OPCODE) => { + if func3 != u64::from(IoFenceFunc::C.raw()) { + warn!("vIOMMU IOFENCE func3 unsupported"); + } else { + iommu_add_raw_command(RiscvIommuCommand { dword0, dword1 }); + } + } + op if op == u64::from(IODIR_OPCODE) => { + let inval_ddt_raw = u64::from(IoDirFunc::InvalDdt.raw()); + let inval_pdt_raw = u64::from(IoDirFunc::InvalPdt.raw()); + if func3 == inval_ddt_raw { + iommu_add_raw_command(RiscvIommuCommand { dword0, dword1 }); + } else if func3 == inval_pdt_raw { + warn!("vIOMMU IODIR INVAL_PDT not supported yet"); + } else { + warn!("vIOMMU IODIR func3={} not supported", func3); + } + } + _ => warn!("vIOMMU unknown CQ opcode {}", opcode), + } + } + + fn read_cq_entry(cqe_addr: usize) -> (u64, u64) { + // SAFETY: CQE address comes from guest-provided CQB/CQH state and + // current implementation assumes this memory is directly readable. + let bytes: [u8; 16] = unsafe { ptr::read(cqe_addr as *const [u8; 16]) }; + let dword0 = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); + let dword1 = u64::from_le_bytes(bytes[8..16].try_into().unwrap()); + (dword0, dword1) + } + + /// Handle Zone's iommu ddt mmio access. (now only support 1LVL DDT) + fn viommu_ddt_emul_access( + &mut self, + zone: &Zone, + offset: usize, + size: usize, + value: usize, + access: MmioAccessType, + ) -> u64 { + // DDT entry fields are 8 bytes wide; keep the current tolerant behavior + // and dispatch by field offset rather than rejecting other widths here. + let _ = size; + // A hypervisor that provides such an emulated IOMMU to the guest may + // 1.retain control of the MSI page tables used to direct MSIs to guest interrupt files + // 2.clear the MSI_FLAT and MSI_MRIF fields of the emulated capabilities register. + + // So its Device-Context Format is below: + // - Translation Control (tc) 8bytes + // - IO Hypervisor guest address translation and protection (iohgatp) 8bytes + // - Translation-attributes (ta) 8bytes + // - First-stage-context (fsc) 8bytes + + let ddt_index = offset / DDT_ENTRY_SIZE; // each entry is 32 bytes (base format) + if ddt_index == 0 || ddt_index >= self.ddt.tc.len() { + warn!( + "ddt_index {} is invalid for zone {}, ignore the access", + ddt_index, + zone.id() + ); + return 0; + } + + match offset % DDT_ENTRY_SIZE { + DDT_FIELD_TC_START..=DDT_FIELD_TC_END => { + self.handle_tc_access(ddt_index, value, access) + } + DDT_FIELD_IOHGATP_START..=DDT_FIELD_IOHGATP_END => { + self.handle_iohgatp_access(ddt_index, value, access) + } + DDT_FIELD_TA_START..=DDT_FIELD_TA_END => { + self.handle_ta_access(ddt_index, value, access) + } + DDT_FIELD_FSC_START..=DDT_FIELD_FSC_END => { + self.handle_fsc_access(ddt_index, value, access) + } + _ => { + error!( + "Unexpected offset value: {:#x}. This should never happen!", + offset + ); + 0 + } + } + } + + fn handle_tc_access(&mut self, ddt_index: usize, value: usize, access: MmioAccessType) -> u64 { + match access { + MmioAccessType::Read => self.ddt.tc[ddt_index], + MmioAccessType::Write => { + if value & !DDT_TC_ALLOWED_WRITE_MASK != 0 { + unimplemented!( + "vIOMMU ddt entry {} tc value {:#x} not supported!", + ddt_index, + value + ); + } + self.ddt.tc[ddt_index] = value as u64; + 0 + } + } + } + + fn handle_iohgatp_access(&self, ddt_index: usize, value: usize, access: MmioAccessType) -> u64 { + match access { + MmioAccessType::Read => { + iommu_read_ddt_field(ddt_index, IommuDdtField::Iohgatp).unwrap_or(0) + } + MmioAccessType::Write => { + if value != 0 { + error!( + "vIOMMU ddt entry {} iohgatp value {:#x} not supported!", + ddt_index, value + ); + } + 0 + } + } + } + + fn handle_ta_access(&self, ddt_index: usize, value: usize, access: MmioAccessType) -> u64 { + match access { + MmioAccessType::Read => { + warn!("vIOMMU ddt entry {} ta read not supported yet", ddt_index); + 0 + } + MmioAccessType::Write => { + if !iommu_write_ddt_field(ddt_index, IommuDdtField::Ta, value as u64) { + warn!("vIOMMU ddt entry {} ta write ignored", ddt_index); + } + 0 + } + } + } + + fn handle_fsc_access(&self, ddt_index: usize, value: usize, access: MmioAccessType) -> u64 { + match access { + MmioAccessType::Read => { + iommu_read_ddt_field(ddt_index, IommuDdtField::Fsc).unwrap_or(0) + } + MmioAccessType::Write => { + let fsc = value as u64; + let mode = (fsc & DDT_FSC::MODE.mask) >> DDT_FSC::MODE.shift; + let mode_ok = mode == DDT_FSC::MODE::BARE.value + || mode == DDT_FSC::MODE::SV39.value + || mode == DDT_FSC::MODE::SV48.value + || mode == DDT_FSC::MODE::SV57.value; + if !mode_ok { + error!( + "vIOMMU ddt entry {} fsc mode {:#x} not supported!", + ddt_index, mode + ); + return 0; + } + if !iommu_write_ddt_field(ddt_index, IommuDdtField::Fsc, fsc) { + warn!("vIOMMU ddt entry {} fsc write ignored", ddt_index); + } + 0 + } + } + } +} diff --git a/src/device/irqchip/aia/mod.rs b/src/device/irqchip/aia/mod.rs index b0ba984e..dbd721c7 100644 --- a/src/device/irqchip/aia/mod.rs +++ b/src/device/irqchip/aia/mod.rs @@ -171,7 +171,7 @@ impl Zone { } /// Initial the virtual IMSIC related to thiz Zone. - pub fn vimsic_init(&mut self, _config: &HvZoneConfig) { + pub fn vimsic_init(&self, _config: &HvZoneConfig) { info!("Zone {} vIMSIC init", self.id()); let msi_pt = vimsic::vimsic_init(self, IMSIC_S_BASE, IMSIC_GUEST_NUM); unsafe { diff --git a/src/device/irqchip/aia/vimsic.rs b/src/device/irqchip/aia/vimsic.rs index f066886f..c4974766 100644 --- a/src/device/irqchip/aia/vimsic.rs +++ b/src/device/irqchip/aia/vimsic.rs @@ -17,6 +17,7 @@ use alloc::vec::Vec; use crate::consts::PAGE_SIZE; +#[cfg(feature = "iommu")] use crate::device::iommu::iommu_msi_pt_tlb_invalid; use crate::memory::Frame; use crate::memory::GuestPhysAddr; @@ -77,6 +78,7 @@ fn msi_pt_fill( // Fill MSI PTE to MSI PT. frame.as_slice_mut()[off..off + MSI_PTE_BYTES].copy_from_slice(&pte); // After change the MSI PTE, invalidate the related TLBs in IOMMU. + #[cfg(feature = "iommu")] iommu_msi_pt_tlb_invalid(zone_id as u16, imsic_gpa as usize); } } @@ -94,7 +96,7 @@ fn msi_pt_fill( /// Maps guest IMSIC interrupt files and builds a one-page IOMMU MSI translation table for this zone. /// /// Returns the physical frame backing the MSI PTE array (`start_paddr()` for IOMMU programming). -pub fn vimsic_init(zone: &mut Zone, imsic_base: usize, guest_num: usize) -> Frame { +pub fn vimsic_init(zone: &Zone, imsic_base: usize, guest_num: usize) -> Frame { let size = crate::memory::PAGE_SIZE; let cpu_ids: Vec<_> = zone.cpu_set().iter().collect(); diff --git a/src/memory/mm.rs b/src/memory/mm.rs index 26adab8a..dd8fc3ba 100644 --- a/src/memory/mm.rs +++ b/src/memory/mm.rs @@ -165,6 +165,20 @@ where Ok(()) } + /// Get the memory region which contains the `start` address. + pub fn get_region(&self, start: PT::VA) -> Option> { + // Find a region that completely includes the range [start, end) + for (key, region) in self.regions.range(..=start) { + let region_start = *key; + let region_end = region_start.into() + region.size; + // Return the region contains the start address + if region_start <= start && region_end > start.into() { + return Some(region.clone()); + } + } + None + } + /// Find and remove memory region which starts from `start` and `size` pub fn delete(&mut self, start: PT::VA, size: usize) -> HvResult { if let Entry::Occupied(e) = self.regions.entry(start) { diff --git a/src/zone.rs b/src/zone.rs index 158d8c67..3052a4f6 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -400,6 +400,14 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult> { ); } + #[cfg(feature = "viommu")] + { + use crate::platform::{IOMMU_SYS_BASE, IOMMU_SYS_SIZE}; + // Create viommu instance and register mmio handler for target zone. + crate::device::iommu::viommu_init(zone_id); + crate::device::iommu::viommu_mmio_handler_register(&zone, IOMMU_SYS_BASE, IOMMU_SYS_SIZE); + } + // #[cfg(target_arch = "aarch64")] // zone.ivc_init(config.ivc_config()); From 79351964029ec4b2c32c6a431dd066d4006dc5e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=99=AF=E5=AE=87?= <2537738252@qq.com> Date: Fri, 15 May 2026 17:11:00 +0800 Subject: [PATCH 2/2] riscv-iommu: add viommu reset logic --- src/device/iommu/arm_smmu/mod.rs | 6 ++ src/device/iommu/dummy_iommu.rs | 3 + src/device/iommu/intel_vtd/mod.rs | 6 ++ src/device/iommu/iommu_trait.rs | 2 + src/device/iommu/mod.rs | 12 ++++ src/device/iommu/riscv_iommu/mod.rs | 11 ++- src/device/iommu/riscv_iommu/viommu.rs | 94 +++++++++++++++++++------- src/hypercall/mod.rs | 4 ++ 8 files changed, 114 insertions(+), 24 deletions(-) diff --git a/src/device/iommu/arm_smmu/mod.rs b/src/device/iommu/arm_smmu/mod.rs index ae2d7ca4..8f113f27 100644 --- a/src/device/iommu/arm_smmu/mod.rs +++ b/src/device/iommu/arm_smmu/mod.rs @@ -84,6 +84,12 @@ impl Iommu for ArmSmmu { zone_id ); } + fn viommu_remove(&self, zone_id: usize) { + todo!( + "ArmSmmu viommu remove for zone id {} not implemented yet.", + zone_id + ); + } fn viommu_mmio_handler_register(&self, zone: &Zone, _viommu_base: usize, _viommu_size: usize) { todo!( "ArmSmmu viommu handler for zone id {} not implemented yet.", diff --git a/src/device/iommu/dummy_iommu.rs b/src/device/iommu/dummy_iommu.rs index 3b5d59a8..125d3eb9 100644 --- a/src/device/iommu/dummy_iommu.rs +++ b/src/device/iommu/dummy_iommu.rs @@ -48,6 +48,9 @@ impl Iommu for DummyIommu { fn viommu_init(&self, zone_id: usize) { info!("No IOMMU implementation available, cannot initialize VIOMMU for Zone id {}", zone_id); } + fn viommu_remove(&self, zone_id: usize) { + info!("No IOMMU implementation available, cannot remove VIOMMU for Zone id {}", zone_id); + } fn viommu_mmio_handler_register(&self, zone: &Zone, _viommu_base: usize, _viommu_size: usize) { info!("No IOMMU implementation available, cannot handle VIOMMU MMIO for Zone id {}", zone.id()); } diff --git a/src/device/iommu/intel_vtd/mod.rs b/src/device/iommu/intel_vtd/mod.rs index 92c85b2c..c9910746 100644 --- a/src/device/iommu/intel_vtd/mod.rs +++ b/src/device/iommu/intel_vtd/mod.rs @@ -85,6 +85,12 @@ impl Iommu for IntelVtd { zone_id ); } + fn viommu_remove(&self, zone_id: usize) { + todo!( + "IntelVtd viommu remove for zone id {} not implemented yet.", + zone_id + ); + } fn viommu_mmio_handler_register(&self, zone: &Zone, _viommu_base: usize, _viommu_size: usize) { todo!( "IntelVtd viommu handler for zone id {} not implemented yet.", diff --git a/src/device/iommu/iommu_trait.rs b/src/device/iommu/iommu_trait.rs index 4a1cf61f..c992905b 100644 --- a/src/device/iommu/iommu_trait.rs +++ b/src/device/iommu/iommu_trait.rs @@ -36,6 +36,8 @@ pub(crate) trait Iommu { fn interrupt_handler(&self, irq_id: usize); /// Initialize the Virtual IOMMU for the Zone fn viommu_init(&self, zone_id: usize); + /// Remove the Virtual IOMMU for the Zone + fn viommu_remove(&self, zone_id: usize); /// Register the Virtual IOMMU MMIO handler for the Zone fn viommu_mmio_handler_register(&self, zone: &Zone, viommu_base: usize, viommu_size: usize); } diff --git a/src/device/iommu/mod.rs b/src/device/iommu/mod.rs index 71b52c36..37cd69d0 100644 --- a/src/device/iommu/mod.rs +++ b/src/device/iommu/mod.rs @@ -73,6 +73,18 @@ pub(crate) fn viommu_init(zone_id: usize) { } } +/// Public interface for removing the Virtual IOMMU for the Zone +pub(crate) fn viommu_remove(zone_id: usize) { + match check_zone_id(zone_id) { + Ok(()) => { + iommu_impl().viommu_remove(zone_id); + } + Err(e) => { + warn!("{}", e); + } + } +} + pub(crate) fn viommu_mmio_handler_register(zone: &Zone, viommu_base: usize, viommu_size: usize) { iommu_impl().viommu_mmio_handler_register(zone, viommu_base, viommu_size); } diff --git a/src/device/iommu/riscv_iommu/mod.rs b/src/device/iommu/riscv_iommu/mod.rs index 0bbe9ca9..2284341f 100644 --- a/src/device/iommu/riscv_iommu/mod.rs +++ b/src/device/iommu/riscv_iommu/mod.rs @@ -39,7 +39,7 @@ use cmd::*; use iommu_hw::*; pub use iommu_hw::{iommu_msi_pt_tlb_invalid, iommu_remove_device}; #[cfg(feature = "viommu")] -use viommu::{viommu_init, viommu_mmio_handler_register}; +use viommu::{viommu_init, viommu_mmio_handler_register, viommu_remove}; pub(super) struct RiscvIommu; @@ -93,6 +93,15 @@ impl Iommu for RiscvIommu { zone_id ); } + fn viommu_remove(&self, zone_id: usize) { + #[cfg(feature = "viommu")] + viommu_remove(zone_id); + #[cfg(not(feature = "viommu"))] + warn!( + "Virtual IOMMU is not enabled, skipping viommu remove for zone {}", + zone_id + ); + } fn viommu_mmio_handler_register(&self, zone: &Zone, viommu_base: usize, viommu_size: usize) { #[cfg(feature = "viommu")] viommu_mmio_handler_register(zone, viommu_base, viommu_size); diff --git a/src/device/iommu/riscv_iommu/viommu.rs b/src/device/iommu/riscv_iommu/viommu.rs index 2e603b66..f22edb62 100644 --- a/src/device/iommu/riscv_iommu/viommu.rs +++ b/src/device/iommu/riscv_iommu/viommu.rs @@ -25,7 +25,7 @@ use super::iommu_hw::{ use super::reg_bits::{DDT_FSC, DDT_TC, IOMMU_CAPS, IOMMU_DDTP, IOMMU_XQB}; use crate::consts::MAX_ZONE_NUM; use crate::consts::{IPI_EVENT_VCPU_RESUME, IPI_EVENT_VCPU_SUSPEND}; -use crate::cpu_data::{signal_other_vcpus_resume, wait_for_other_vcpus_suspend, CpuSet}; +use crate::cpu_data::{signal_other_vcpus_resume, wait_for_other_vcpus_suspend}; use crate::error::HvResult; use crate::event::send_event_to_all; use crate::memory::{GuestPhysAddr, MMIOAccess, MemoryRegion}; @@ -191,6 +191,27 @@ pub(super) fn viommu_init(zone_id: usize) { info!("Zone {}'s Virtual IOMMU initialized.", zone_id); } +/// Remove one viommu instance for target zone and clear its physical DDT side effects. +pub(super) fn viommu_remove(zone_id: usize) { + if !validate_zone_id(zone_id) { + return; + } + + let viommu = { + let mut viommu_arr = VIOMMU_ARR.lock(); + viommu_arr[zone_id].take() + }; + + let Some(viommu) = viommu else { + warn!("Zone {}'s Virtual IOMMU does not exist.", zone_id); + return; + }; + + // Clean some content stored in memory related to this viommu. + viommu.cleanup_physical_ddt(); + info!("Zone {}'s Virtual IOMMU removed.", zone_id); +} + /// Register viommu mmio handler for target zone. pub(super) fn viommu_mmio_handler_register(zone: &Zone, viommu_base: usize, viommu_size: usize) { zone.write() @@ -292,28 +313,11 @@ struct FaultQueueState { tail: u32, } +/// Device-directory-table shadow state. +/// It affects the device directory table that the zone sees. struct DdtShadowState { tc: Vec, -} - -struct VcpuSuspendGuard { - cpu_set: CpuSet, -} - -impl VcpuSuspendGuard { - fn new(cpu_set: CpuSet) -> Self { - send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_SUSPEND); - riscv::asm::fence(); - wait_for_other_vcpus_suspend(cpu_set); - Self { cpu_set } - } -} - -impl Drop for VcpuSuspendGuard { - fn drop(&mut self) { - send_event_to_all(self.cpu_set, 0, IPI_EVENT_VCPU_RESUME); - signal_other_vcpus_resume(self.cpu_set); - } + fsc_written: Vec, } impl VirtualIommu { @@ -393,8 +397,19 @@ impl DdtShadowState { fn new() -> Self { Self { tc: vec![0; MAX_VIOMMU_DDT_DEVICES], + fsc_written: vec![false; MAX_VIOMMU_DDT_DEVICES], } } + + fn mark_fsc_written(&mut self, device_id: usize) { + if let Some(written) = self.fsc_written.get_mut(device_id) { + *written = true; + } + } + + fn fsc_needs_cleanup(&self, device_id: usize) -> bool { + self.fsc_written.get(device_id).copied().unwrap_or(false) + } } impl VirtualIommuInner { @@ -540,11 +555,14 @@ impl VirtualIommuInner { // We unmap this page to trigger a page fault when the guest accesses it. let cpu_set = zone_inner.cpu_set(); - let _resume_guard = VcpuSuspendGuard::new(cpu_set); + send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_SUSPEND); + wait_for_other_vcpus_suspend(cpu_set); let gpm = zone_inner.gpm_mut(); if let Err(err) = gpm.delete(region.start, region.size) { error!("vIOMMU ddtp region delete failed: {:?}", err); + send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_RESUME); + signal_other_vcpus_resume(cpu_set); return false; } @@ -559,6 +577,8 @@ impl VirtualIommuInner { ); if let Err(err) = gpm.insert(left_region) { error!("vIOMMU ddtp left region insert failed: {:?}", err); + send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_RESUME); + signal_other_vcpus_resume(cpu_set); return false; } } @@ -573,6 +593,8 @@ impl VirtualIommuInner { ); if let Err(err) = gpm.insert(right_region) { error!("vIOMMU ddtp right region insert failed: {:?}", err); + send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_RESUME); + signal_other_vcpus_resume(cpu_set); return false; } } @@ -581,6 +603,8 @@ impl VirtualIommuInner { unsafe { riscv_h::asm::hfence_gvma(0, 0) }; // Keep zone_id as 0 for now to preserve current behavior. zone_inner.mmio_region_register(ddt_gpa, VIOMMU_DDT1LVL_SIZE, viommu_ddt_emul_handler, 0); + send_event_to_all(cpu_set, 0, IPI_EVENT_VCPU_RESUME); + signal_other_vcpus_resume(cpu_set); true } @@ -780,7 +804,7 @@ impl VirtualIommuInner { } } - fn handle_fsc_access(&self, ddt_index: usize, value: usize, access: MmioAccessType) -> u64 { + fn handle_fsc_access(&mut self, ddt_index: usize, value: usize, access: MmioAccessType) -> u64 { match access { MmioAccessType::Read => { iommu_read_ddt_field(ddt_index, IommuDdtField::Fsc).unwrap_or(0) @@ -801,9 +825,33 @@ impl VirtualIommuInner { } if !iommu_write_ddt_field(ddt_index, IommuDdtField::Fsc, fsc) { warn!("vIOMMU ddt entry {} fsc write ignored", ddt_index); + } else { + self.ddt.mark_fsc_written(ddt_index); } 0 } } } } + +impl VirtualIommu { + fn cleanup_physical_ddt(&self) { + self.inner.lock().cleanup_physical_ddt(); + } +} + +impl VirtualIommuInner { + fn cleanup_physical_ddt(&mut self) { + for device_id in 1..self.ddt.tc.len() { + if !self.ddt.fsc_needs_cleanup(device_id) { + continue; + } + if iommu_write_ddt_field(device_id, IommuDdtField::Fsc, 0) { + self.ddt.fsc_written[device_id] = false; + info!("vIOMMU cleaned DDT FSC for device {}", device_id); + } else { + warn!("vIOMMU failed to clean DDT FSC for device {}", device_id); + } + } + } +} diff --git a/src/hypercall/mod.rs b/src/hypercall/mod.rs index 3a67f20f..9261df81 100644 --- a/src/hypercall/mod.rs +++ b/src/hypercall/mod.rs @@ -273,6 +273,10 @@ impl<'a> HyperCall<'a> { drop(zone_w); zone.arch_irqchip_reset(); + + // Remove viommu instance related to this zone. + #[cfg(feature = "viommu")] + crate::device::iommu::viommu_remove(zone_id as usize); drop(zone); // Reset zone_id for all devices allocated to this zone