diff --git a/kernel/src/cpu/idt/common.rs b/kernel/src/cpu/idt/common.rs index a6cc1d655..3a1754060 100644 --- a/kernel/src/cpu/idt/common.rs +++ b/kernel/src/cpu/idt/common.rs @@ -401,3 +401,21 @@ global_asm!( "#, options(att_syntax) ); + +#[repr(u32)] +#[derive(Debug, Clone, Copy)] +pub enum IdtEventType { + Unknown = 0, + External, + Software, +} + +impl IdtEventType { + pub fn is_external_interrupt(&self, vector: usize) -> bool { + match self { + Self::External => true, + Self::Software => false, + Self::Unknown => SVSM_PLATFORM.as_dyn_ref().is_external_interrupt(vector), + } + } +} diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S index 5888618e9..cfd04c37f 100644 --- a/kernel/src/cpu/idt/entry.S +++ b/kernel/src/cpu/idt/entry.S @@ -42,6 +42,7 @@ asm_entry_\name: push_regs movl $\vector, %esi movq %rsp, %rdi + xorl %edx, %edx call ex_handler_\handler jmp default_return .endm diff --git a/kernel/src/cpu/idt/svsm.rs b/kernel/src/cpu/idt/svsm.rs index 2d23eeace..bcce012bb 100644 --- a/kernel/src/cpu/idt/svsm.rs +++ b/kernel/src/cpu/idt/svsm.rs @@ -10,10 +10,10 @@ use super::super::percpu::{current_task, this_cpu}; use super::super::tss::IST_DF; use super::super::vc::handle_vc_exception; use super::common::{ - idt_mut, user_mode, IdtEntry, PageFaultError, AC_VECTOR, BP_VECTOR, BR_VECTOR, CP_VECTOR, - DB_VECTOR, DE_VECTOR, DF_VECTOR, GP_VECTOR, HV_VECTOR, INT_INJ_VECTOR, MCE_VECTOR, MF_VECTOR, - NMI_VECTOR, NM_VECTOR, NP_VECTOR, OF_VECTOR, PF_VECTOR, SS_VECTOR, SX_VECTOR, TS_VECTOR, - UD_VECTOR, VC_VECTOR, XF_VECTOR, + idt_mut, user_mode, IdtEntry, IdtEventType, PageFaultError, AC_VECTOR, BP_VECTOR, BR_VECTOR, + CP_VECTOR, DB_VECTOR, DE_VECTOR, DF_VECTOR, GP_VECTOR, HV_VECTOR, INT_INJ_VECTOR, MCE_VECTOR, + MF_VECTOR, NMI_VECTOR, NM_VECTOR, NP_VECTOR, OF_VECTOR, PF_VECTOR, SS_VECTOR, SX_VECTOR, + TS_VECTOR, UD_VECTOR, VC_VECTOR, XF_VECTOR, }; use crate::address::VirtAddr; use crate::cpu::registers::RFlags; @@ -227,7 +227,16 @@ extern "C" fn ex_handler_vmm_communication(ctxt: &mut X86ExceptionContext, vecto // System Call SoftIRQ handler #[no_mangle] -extern "C" fn ex_handler_system_call(ctxt: &mut X86ExceptionContext) { +extern "C" fn ex_handler_system_call( + ctxt: &mut X86ExceptionContext, + vector: usize, + event_type: IdtEventType, +) { + // Ensure that this vector was not invoked as a hardware interrupt vector. + if event_type.is_external_interrupt(vector) { + panic!("Syscall handler invoked as external interrupt!"); + } + if !user_mode(ctxt) { panic!("Syscall handler called from kernel mode!"); } @@ -275,4 +284,4 @@ global_asm!( include_str!("entry.S"), IF = const RFlags::IF.bits(), options(att_syntax) -); \ No newline at end of file +); diff --git a/kernel/src/cpu/irq_state.rs b/kernel/src/cpu/irq_state.rs index 16e7beb52..429f99039 100644 --- a/kernel/src/cpu/irq_state.rs +++ b/kernel/src/cpu/irq_state.rs @@ -152,6 +152,19 @@ impl IrqState { pub fn count(&self) -> isize { self.count.load(Ordering::Relaxed) } + + /// Changes whether interrupts will be enabled when the nesting count + /// drops to zero. + /// + /// # Safety + /// + /// The caller must ensure that the current nesting count is non-zero, + /// and must ensure that the specified value is appropriate for the + /// current environment. + pub unsafe fn set_restore_state(&self, enabled: bool) { + assert!(self.count.load(Ordering::Relaxed) != 0); + self.state.store(enabled, Ordering::Relaxed); + } } impl Drop for IrqState { @@ -212,21 +225,24 @@ mod tests { #[test] #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn irq_enable_disable() { - assert!(irqs_disabled()); unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); assert!(irqs_enabled()); raw_irqs_disable(); assert!(irqs_disabled()); + if was_enabled { + raw_irqs_enable(); + } } } #[test] #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn irq_state() { - assert!(irqs_disabled()); unsafe { let state = IrqState::new(); + let was_enabled = irqs_enabled(); raw_irqs_enable(); state.disable(); assert!(irqs_disabled()); @@ -235,22 +251,26 @@ mod tests { assert!(irqs_disabled()); state.enable(); assert!(irqs_enabled()); - raw_irqs_disable(); + if !was_enabled { + raw_irqs_disable(); + } } } #[test] #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn irq_guard_test() { - assert!(irqs_disabled()); unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); assert!(irqs_enabled()); let g1 = IrqGuard::new(); assert!(irqs_disabled()); drop(g1); assert!(irqs_enabled()); - raw_irqs_disable(); + if !was_enabled { + raw_irqs_disable(); + } } } } diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 84b105db0..35510f58f 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -825,6 +825,17 @@ impl PerCpu { } pub fn schedule_init(&self) -> TaskPointer { + // If the platform permits the use of interrupts, then ensure that + // interrupts will be enabled on the current CPU when leaving the + // scheduler environment. This is done after disabling interrupts + // for scheduler initialization so that the first interrupt that can + // be received will always observe that there is a current task and + // not the boot thread. + if SVSM_PLATFORM.as_dyn_ref().use_interrupts() { + unsafe { + self.irq_state.set_restore_state(true); + } + } let task = self.runqueue.lock_write().schedule_init(); self.current_stack.set(task.stack_bounds()); task diff --git a/kernel/src/locking/rwlock.rs b/kernel/src/locking/rwlock.rs index 9d81b7d81..224a6fc0f 100644 --- a/kernel/src/locking/rwlock.rs +++ b/kernel/src/locking/rwlock.rs @@ -330,11 +330,11 @@ mod tests { #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn rw_lock_irq_unsafe() { use crate::cpu::irq_state::{raw_irqs_disable, raw_irqs_enable}; - use crate::cpu::{irqs_disabled, irqs_enabled}; + use crate::cpu::irqs_enabled; use crate::locking::*; - assert!(irqs_disabled()); unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); let lock = RWLock::new(0); @@ -354,7 +354,9 @@ mod tests { // IRQs must still be enabled assert!(irqs_enabled()); - raw_irqs_disable(); + if !was_enabled { + raw_irqs_disable(); + } } } @@ -365,8 +367,8 @@ mod tests { use crate::cpu::{irqs_disabled, irqs_enabled}; use crate::locking::*; - assert!(irqs_disabled()); unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); let lock = RWLockIrqSafe::new(0); @@ -388,7 +390,9 @@ mod tests { // IRQs must still be enabled assert!(irqs_enabled()); - raw_irqs_disable(); + if !was_enabled { + raw_irqs_disable(); + } } } } diff --git a/kernel/src/locking/spinlock.rs b/kernel/src/locking/spinlock.rs index f6b969ba4..1a91338ee 100644 --- a/kernel/src/locking/spinlock.rs +++ b/kernel/src/locking/spinlock.rs @@ -224,9 +224,8 @@ mod tests { #[test] #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn spin_lock_irq_unsafe() { - assert!(irqs_disabled()); - unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); let spin_lock = SpinLock::new(0); @@ -235,16 +234,17 @@ mod tests { drop(guard); assert!(irqs_enabled()); - raw_irqs_disable(); + if !was_enabled { + raw_irqs_disable(); + } } } #[test] #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn spin_lock_irq_safe() { - assert!(irqs_disabled()); - unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); let spin_lock = SpinLockIrqSafe::new(0); @@ -253,16 +253,17 @@ mod tests { drop(guard); assert!(irqs_enabled()); - raw_irqs_disable(); + if !was_enabled { + raw_irqs_disable(); + } } } #[test] #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")] fn spin_trylock_irq_safe() { - assert!(irqs_disabled()); - unsafe { + let was_enabled = irqs_enabled(); raw_irqs_enable(); let spin_lock = SpinLockIrqSafe::new(0); @@ -276,8 +277,10 @@ mod tests { drop(g1); assert!(irqs_enabled()); - // Leave with IRQs disabled, as test was entered. - raw_irqs_disable(); + // Leave with IRQs configured as test was entered. + if !was_enabled { + raw_irqs_disable(); + } } } } diff --git a/kernel/src/platform/mod.rs b/kernel/src/platform/mod.rs index 1a6cd68c4..0d7eaad81 100644 --- a/kernel/src/platform/mod.rs +++ b/kernel/src/platform/mod.rs @@ -115,12 +115,19 @@ pub trait SvsmPlatform { /// Queries the state of APIC registration on this system. fn query_apic_registration_state(&self) -> bool; + /// Determines whether the platform supports interrupts to the SVSM. + fn use_interrupts(&self) -> bool; + /// Signal an IRQ on one or more CPUs. fn post_irq(&self, icr: u64) -> Result<(), SvsmError>; /// Perform an EOI of the current interrupt. fn eoi(&self); + /// Determines whether a given interrupt vector was invoked as an external + /// interrupt. + fn is_external_interrupt(&self, vector: usize) -> bool; + /// Start an additional processor. fn start_cpu(&self, cpu: &PerCpu, start_rip: u64) -> Result<(), SvsmError>; } diff --git a/kernel/src/platform/native.rs b/kernel/src/platform/native.rs index 7d7aa5250..17f3d4421 100644 --- a/kernel/src/platform/native.rs +++ b/kernel/src/platform/native.rs @@ -128,6 +128,10 @@ impl SvsmPlatform for NativePlatform { false } + fn use_interrupts(&self) -> bool { + true + } + fn post_irq(&self, icr: u64) -> Result<(), SvsmError> { write_msr(APIC_MSR_ICR, icr); Ok(()) @@ -137,6 +141,13 @@ impl SvsmPlatform for NativePlatform { todo!(); } + fn is_external_interrupt(&self, _vector: usize) -> bool { + // For a native platform, the hypervisor is fully trusted with all + // event delivery, so all events are assumed not to be external + // interrupts. + false + } + fn start_cpu(&self, _cpu: &PerCpu, _start_rip: u64) -> Result<(), SvsmError> { todo!(); } diff --git a/kernel/src/platform/snp.rs b/kernel/src/platform/snp.rs index 85bbe6442..ad0377649 100644 --- a/kernel/src/platform/snp.rs +++ b/kernel/src/platform/snp.rs @@ -19,7 +19,7 @@ use crate::sev::hv_doorbell::current_hv_doorbell; use crate::sev::msr_protocol::{ hypervisor_ghcb_features, request_termination_msr, verify_ghcb_version, GHCBHvFeatures, }; -use crate::sev::status::vtom_enabled; +use crate::sev::status::{sev_restricted_injection, vtom_enabled}; use crate::sev::{ init_hypervisor_ghcb_features, pvalidate_range, sev_status_init, sev_status_verify, PvalidateOp, }; @@ -69,11 +69,15 @@ impl From for PvalidateOp { } #[derive(Clone, Copy, Debug)] -pub struct SnpPlatform {} +pub struct SnpPlatform { + can_use_interrupts: bool, +} impl SnpPlatform { pub fn new() -> Self { - Self {} + Self { + can_use_interrupts: false, + } } } @@ -87,6 +91,14 @@ impl SvsmPlatform for SnpPlatform { fn env_setup(&mut self, _debug_serial_port: u16, vtom: usize) -> Result<(), SvsmError> { sev_status_init(); VTOM.init(&vtom).map_err(|_| SvsmError::PlatformInit)?; + + // Now that SEV status is initialized, determine whether this platform + // supports the use of SVSM interrupts. SVSM interrupts are supported + // if this system uses restricted injection. + if sev_restricted_injection() { + self.can_use_interrupts = true; + } + Ok(()) } @@ -255,6 +267,10 @@ impl SvsmPlatform for SnpPlatform { APIC_EMULATION_REG_COUNT.load(Ordering::Relaxed) > 0 } + fn use_interrupts(&self) -> bool { + self.can_use_interrupts + } + fn post_irq(&self, icr: u64) -> Result<(), SvsmError> { current_ghcb().hv_ipi(icr)?; Ok(()) @@ -270,6 +286,15 @@ impl SvsmPlatform for SnpPlatform { } } + fn is_external_interrupt(&self, _vector: usize) -> bool { + // When restricted injection is active, the event disposition is + // already known to the caller and thus need not be examined. When + // restricted injection is not active, the hypervisor must be trusted + // with all event delivery, so all events are assumed not to be + // external interrupts. + false + } + fn start_cpu(&self, cpu: &PerCpu, start_rip: u64) -> Result<(), SvsmError> { let (vmsa_pa, sev_features) = cpu.alloc_svsm_vmsa(*VTOM as u64, start_rip)?; diff --git a/kernel/src/platform/tdp.rs b/kernel/src/platform/tdp.rs index bb210c14b..d0e9c0873 100644 --- a/kernel/src/platform/tdp.rs +++ b/kernel/src/platform/tdp.rs @@ -144,12 +144,23 @@ impl SvsmPlatform for TdpPlatform { false } + fn use_interrupts(&self) -> bool { + true + } + fn post_irq(&self, _icr: u64) -> Result<(), SvsmError> { Err(SvsmError::Tdx) } fn eoi(&self) {} + fn is_external_interrupt(&self, _vector: usize) -> bool { + // Examine the APIC ISR to determine whether this interrupt vector is + // active. If so, it is assumed to be an external interrupt. + // TODO - add code to read the APIC ISR. + todo!(); + } + fn start_cpu(&self, _cpu: &PerCpu, _start_rip: u64) -> Result<(), SvsmError> { todo!(); } diff --git a/kernel/src/sev/ghcb.rs b/kernel/src/sev/ghcb.rs index 96f33841c..26c2ddfb6 100644 --- a/kernel/src/sev/ghcb.rs +++ b/kernel/src/sev/ghcb.rs @@ -7,7 +7,7 @@ use crate::address::{Address, PhysAddr, VirtAddr}; use crate::cpu::msr::{write_msr, SEV_GHCB}; use crate::cpu::percpu::this_cpu; -use crate::cpu::{flush_tlb_global_sync, X86GeneralRegs}; +use crate::cpu::{flush_tlb_global_sync, IrqGuard, X86GeneralRegs}; use crate::error::SvsmError; use crate::mm::validate::{ valid_bitmap_clear_valid_4k, valid_bitmap_set_valid_4k, valid_bitmap_valid_addr, @@ -377,8 +377,14 @@ impl GHCB { let ghcb_address = VirtAddr::from(self as *const GHCB); let ghcb_pa = u64::from(virt_to_phys(ghcb_address)); + // Disable interrupts between writing the MSR and making the GHCB call + // to prevent reentrant use of the GHCB MSR. + let guard = IrqGuard::new(); write_msr(SEV_GHCB, ghcb_pa); - raw_vmgexit(); + unsafe { + raw_vmgexit(); + } + drop(guard); let sw_exit_info_1 = self.get_exit_info_1_valid()?; if sw_exit_info_1 != 0 { diff --git a/kernel/src/sev/msr_protocol.rs b/kernel/src/sev/msr_protocol.rs index 8f8976738..dcb59c1e6 100644 --- a/kernel/src/sev/msr_protocol.rs +++ b/kernel/src/sev/msr_protocol.rs @@ -5,7 +5,9 @@ // Author: Joerg Roedel use crate::address::{Address, PhysAddr}; +use crate::cpu::irq_state::raw_irqs_disable; use crate::cpu::msr::{read_msr, write_msr, SEV_GHCB}; +use crate::cpu::{irqs_enabled, IrqGuard}; use crate::error::SvsmError; use crate::utils::halt; use crate::utils::immut_after_init::ImmutAfterInitCell; @@ -71,9 +73,15 @@ static GHCB_HV_FEATURES: ImmutAfterInitCell = ImmutAfterInitCell /// Check that we support the hypervisor's advertised GHCB versions. pub fn verify_ghcb_version() { + // This function is normally only called early during initializtion before + // interrupts have been enabled, and before interrupt guards can safely be + // used. + assert!(!irqs_enabled()); // Request SEV information. write_msr(SEV_GHCB, GHCBMsr::SEV_INFO_REQ); - raw_vmgexit(); + unsafe { + raw_vmgexit(); + } let sev_info = read_msr(SEV_GHCB); // Parse the results. @@ -100,9 +108,13 @@ pub fn hypervisor_ghcb_features() -> GHCBHvFeatures { } pub fn init_hypervisor_ghcb_features() -> Result<(), GhcbMsrError> { + let guard = IrqGuard::new(); write_msr(SEV_GHCB, GHCBMsr::SNP_HV_FEATURES_REQ); - raw_vmgexit(); + unsafe { + raw_vmgexit(); + } let result = read_msr(SEV_GHCB); + drop(guard); if (result & 0xFFF) == GHCBMsr::SNP_HV_FEATURES_RESP { let features = GHCBHvFeatures::from_bits_truncate(result >> 12); @@ -134,9 +146,13 @@ pub fn register_ghcb_gpa_msr(addr: PhysAddr) -> Result<(), GhcbMsrError> { let mut info = addr.bits() as u64; info |= GHCBMsr::SNP_REG_GHCB_GPA_REQ; + let guard = IrqGuard::new(); write_msr(SEV_GHCB, info); - raw_vmgexit(); + unsafe { + raw_vmgexit(); + } info = read_msr(SEV_GHCB); + drop(guard); if (info & 0xfff) != GHCBMsr::SNP_REG_GHCB_GPA_RESP { return Err(GhcbMsrError::InfoMismatch); @@ -159,9 +175,13 @@ fn set_page_valid_status_msr(addr: PhysAddr, valid: bool) -> Result<(), GhcbMsrE } info |= GHCBMsr::SNP_STATE_CHANGE_REQ; + let guard = IrqGuard::new(); write_msr(SEV_GHCB, info); - raw_vmgexit(); + unsafe { + raw_vmgexit(); + } let response = read_msr(SEV_GHCB); + drop(guard); if (response & 0xfff) != GHCBMsr::SNP_STATE_CHANGE_RESP { return Err(GhcbMsrError::InfoMismatch); @@ -185,8 +205,16 @@ pub fn invalidate_page_msr(addr: PhysAddr) -> Result<(), GhcbMsrError> { pub fn request_termination_msr() -> ! { let info: u64 = GHCBMsr::TERM_REQ; - write_msr(SEV_GHCB, info); - raw_vmgexit(); + // Safety + // + // Since this processor is destined for a fatal termination, there is + // no reason to preserve interrupt state. Interrupts can be disabled + // outright prior to shutdown. + unsafe { + raw_irqs_disable(); + write_msr(SEV_GHCB, info); + raw_vmgexit(); + } loop { halt(); } diff --git a/kernel/src/sev/status.rs b/kernel/src/sev/status.rs index beab8c3ab..18c07eead 100644 --- a/kernel/src/sev/status.rs +++ b/kernel/src/sev/status.rs @@ -158,6 +158,10 @@ pub fn vtom_enabled() -> bool { sev_flags().contains(SEVStatusFlags::VTOM) } +pub fn sev_restricted_injection() -> bool { + sev_flags().contains(SEVStatusFlags::REST_INJ) +} + pub fn sev_status_verify() { let required = SEVStatusFlags::SEV | SEVStatusFlags::SEV_ES | SEVStatusFlags::SEV_SNP; let supported = SEVStatusFlags::DBGSWP diff --git a/kernel/src/sev/utils.rs b/kernel/src/sev/utils.rs index df69bab4e..8dbeb875f 100644 --- a/kernel/src/sev/utils.rs +++ b/kernel/src/sev/utils.rs @@ -164,10 +164,13 @@ pub fn get_dr7() -> u64 { out } -pub fn raw_vmgexit() { - unsafe { - asm!("rep; vmmcall", options(att_syntax)); - } +/// # Safety +/// VMGEXIT operations generally need to be performed with interrupts disabled +/// to ensure that an interrupt cannot cause the GHCB MSR to change prior to +/// exiting to the host. It is the caller's responsibility to ensure that +/// interrupt handling is configured correctly for the attemtped operation. +pub unsafe fn raw_vmgexit() { + asm!("rep; vmmcall", options(att_syntax)); } bitflags::bitflags! {