diff --git a/kernel/src/cpu/control_regs.rs b/kernel/src/cpu/control_regs.rs index ffd6c2461..137b0286d 100644 --- a/kernel/src/cpu/control_regs.rs +++ b/kernel/src/cpu/control_regs.rs @@ -34,6 +34,7 @@ pub fn cr4_init() { } bitflags! { + #[derive(Debug, Clone, Copy)] pub struct CR0Flags: u64 { const PE = 1 << 0; // Protection Enabled const MP = 1 << 1; // Monitor Coprocessor @@ -108,6 +109,7 @@ pub fn write_cr3(cr3: PhysAddr) { } bitflags! { + #[derive(Debug, Clone, Copy)] pub struct CR4Flags: u64 { const VME = 1 << 0; // Virtual-8086 Mode Extensions const PVI = 1 << 1; // Protected-Mode Virtual Interrupts diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 893fe3b4c..31b0ec2f0 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -323,7 +323,12 @@ impl PerCpu { tss: Cell::new(X86Tss::new()), svsm_vmsa: OnceCell::new(), reset_ip: Cell::new(0xffff_fff0), - vm_range: VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL), + vm_range: { + let mut vmr = VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL); + vmr.set_per_cpu(true); + vmr + }, + vrange_4k: RefCell::new(VirtualRange::new()), vrange_2m: RefCell::new(VirtualRange::new()), runqueue: RefCell::new(RunQueue::new()), diff --git a/kernel/src/cpu/tlb.rs b/kernel/src/cpu/tlb.rs index 501673fdb..11c1a3a03 100644 --- a/kernel/src/cpu/tlb.rs +++ b/kernel/src/cpu/tlb.rs @@ -5,6 +5,8 @@ // Author: Joerg Roedel use crate::address::{Address, VirtAddr}; +use crate::cpu::control_regs::{read_cr4, write_cr4, CR4Flags}; + use core::arch::asm; const INVLPGB_VALID_VA: u64 = 1u64 << 0; @@ -50,6 +52,21 @@ pub fn flush_tlb_global_sync() { do_tlbsync(); } +pub fn flush_tlb_global_percpu() { + let cr4 = read_cr4(); + write_cr4(cr4 ^ CR4Flags::PGE); + write_cr4(cr4); +} + +pub fn flush_address_percpu(va: VirtAddr) { + let va: u64 = va.page_align().bits() as u64; + unsafe { + asm!("invlpg (%rax)", + in("rax") va, + options(att_syntax)); + } +} + pub fn flush_address(va: VirtAddr) { let rax: u64 = (va.page_align().bits() as u64) | INVLPGB_VALID_VA diff --git a/kernel/src/mm/ptguards.rs b/kernel/src/mm/ptguards.rs index 17f3eb245..a6cd6c94a 100644 --- a/kernel/src/mm/ptguards.rs +++ b/kernel/src/mm/ptguards.rs @@ -7,12 +7,12 @@ use super::pagetable::PTEntryFlags; use crate::address::{Address, PhysAddr, VirtAddr}; use crate::cpu::percpu::this_cpu; -use crate::cpu::tlb::flush_address_sync; +use crate::cpu::tlb::flush_address_percpu; use crate::error::SvsmError; use crate::mm::virtualrange::{ virt_alloc_range_2m, virt_alloc_range_4k, virt_free_range_2m, virt_free_range_4k, }; -use crate::types::{PAGE_SIZE, PAGE_SIZE_2M}; +use crate::types::{PageSize, PAGE_SIZE, PAGE_SIZE_2M}; use crate::utils::MemoryRegion; @@ -99,13 +99,19 @@ impl PerCPUPageMappingGuard { impl Drop for PerCPUPageMappingGuard { fn drop(&mut self) { - if self.huge { + let size = if self.huge { this_cpu().get_pgtable().unmap_region_2m(self.mapping); virt_free_range_2m(self.mapping); + PageSize::Huge } else { this_cpu().get_pgtable().unmap_region_4k(self.mapping); virt_free_range_4k(self.mapping); + PageSize::Regular + }; + // This iterative flush is acceptable for same-CPU mappings because no + // broadcast is involved for each iteration. + for page in self.mapping.iter_pages(size) { + flush_address_percpu(page); } - flush_address_sync(self.mapping.start()); } } diff --git a/kernel/src/mm/vm/range.rs b/kernel/src/mm/vm/range.rs index c7513f6a5..3a913d33f 100644 --- a/kernel/src/mm/vm/range.rs +++ b/kernel/src/mm/vm/range.rs @@ -5,7 +5,7 @@ // Author: Joerg Roedel use crate::address::{Address, VirtAddr}; -use crate::cpu::flush_tlb_global_sync; +use crate::cpu::{flush_tlb_global_percpu, flush_tlb_global_sync}; use crate::error::SvsmError; use crate::locking::RWLock; use crate::mm::pagetable::{PTEntryFlags, PageTable, PageTablePart, PageTableRef}; @@ -59,6 +59,10 @@ pub struct VMR { /// [`PTEntryFlags`] global to all mappings in this region. This is a /// combination of [`PTEntryFlags::GLOBAL`] and [`PTEntryFlags::USER`]. pt_flags: PTEntryFlags, + + /// Indicates that this [`struct VMR`] is visible only on a single CPU + /// and therefore TLB flushes do not require broadcast. + per_cpu: bool, } impl VMR { @@ -82,9 +86,16 @@ impl VMR { tree: RWLock::new(RBTree::new(VMMAdapter::new())), pgtbl_parts: RWLock::new(Vec::new()), pt_flags: flags, + per_cpu: false, } } + /// Marks a [`struct VMR`] as being associated with only a single CPU + /// so that TLB flushes do not require broadcast. + pub fn set_per_cpu(&mut self, per_cpu: bool) { + self.per_cpu = per_cpu; + } + /// Allocated all [`PageTablePart`]s needed to map this region /// /// # Returns @@ -425,7 +436,11 @@ impl VMR { let mut cursor = tree.find_mut(&addr); if let Some(node) = cursor.get() { self.unmap_vmm(node); - flush_tlb_global_sync(); + if self.per_cpu { + flush_tlb_global_percpu(); + } else { + flush_tlb_global_sync(); + } } cursor.remove().ok_or(SvsmError::Mem) }