Skip to content

Commit

Permalink
cpu/tlb: do not broadcast per-cpu TLB flushes
Browse files Browse the repository at this point in the history
VM ranges that are specific to a single CPU do not require TLB
invalidations to be broadcast to multiple processors.  This is
especially important during the early boot phase when no other
processors are online and when the infrastructure required to broadcast
TLB invalidations may not yet be fully initialized.  The same is true
for temporary mappings established in a per-CPU address range.

Signed-off-by: Jon Lange <[email protected]>
  • Loading branch information
msft-jlange committed Jul 24, 2024
1 parent ca6fea7 commit e1c168b
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 7 deletions.
2 changes: 2 additions & 0 deletions kernel/src/cpu/control_regs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub fn cr4_init() {
}

bitflags! {
#[derive(Debug, Clone, Copy)]
pub struct CR0Flags: u64 {
const PE = 1 << 0; // Protection Enabled
const MP = 1 << 1; // Monitor Coprocessor
Expand Down Expand Up @@ -108,6 +109,7 @@ pub fn write_cr3(cr3: PhysAddr) {
}

bitflags! {
#[derive(Debug, Clone, Copy)]
pub struct CR4Flags: u64 {
const VME = 1 << 0; // Virtual-8086 Mode Extensions
const PVI = 1 << 1; // Protected-Mode Virtual Interrupts
Expand Down
7 changes: 6 additions & 1 deletion kernel/src/cpu/percpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,12 @@ impl PerCpu {
tss: Cell::new(X86Tss::new()),
svsm_vmsa: OnceCell::new(),
reset_ip: Cell::new(0xffff_fff0),
vm_range: VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL),
vm_range: {
let mut vmr = VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL);
vmr.set_per_cpu(true);
vmr
},

vrange_4k: RefCell::new(VirtualRange::new()),
vrange_2m: RefCell::new(VirtualRange::new()),
runqueue: RefCell::new(RunQueue::new()),
Expand Down
17 changes: 17 additions & 0 deletions kernel/src/cpu/tlb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
// Author: Joerg Roedel <[email protected]>

use crate::address::{Address, VirtAddr};
use crate::cpu::control_regs::{read_cr4, write_cr4, CR4Flags};

use core::arch::asm;

const INVLPGB_VALID_VA: u64 = 1u64 << 0;
Expand Down Expand Up @@ -50,6 +52,21 @@ pub fn flush_tlb_global_sync() {
do_tlbsync();
}

pub fn flush_tlb_global_percpu() {
let cr4 = read_cr4();
write_cr4(cr4 ^ CR4Flags::PGE);
write_cr4(cr4);
}

pub fn flush_address_percpu(va: VirtAddr) {
let va: u64 = va.page_align().bits() as u64;
unsafe {
asm!("invlpg (%rax)",
in("rax") va,
options(att_syntax));
}
}

pub fn flush_address(va: VirtAddr) {
let rax: u64 = (va.page_align().bits() as u64)
| INVLPGB_VALID_VA
Expand Down
14 changes: 10 additions & 4 deletions kernel/src/mm/ptguards.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
use super::pagetable::PTEntryFlags;
use crate::address::{Address, PhysAddr, VirtAddr};
use crate::cpu::percpu::this_cpu;
use crate::cpu::tlb::flush_address_sync;
use crate::cpu::tlb::flush_address_percpu;
use crate::error::SvsmError;
use crate::mm::virtualrange::{
virt_alloc_range_2m, virt_alloc_range_4k, virt_free_range_2m, virt_free_range_4k,
};
use crate::types::{PAGE_SIZE, PAGE_SIZE_2M};
use crate::types::{PageSize, PAGE_SIZE, PAGE_SIZE_2M};

use crate::utils::MemoryRegion;

Expand Down Expand Up @@ -99,13 +99,19 @@ impl PerCPUPageMappingGuard {

impl Drop for PerCPUPageMappingGuard {
fn drop(&mut self) {
if self.huge {
let size = if self.huge {
this_cpu().get_pgtable().unmap_region_2m(self.mapping);
virt_free_range_2m(self.mapping);
PageSize::Huge
} else {
this_cpu().get_pgtable().unmap_region_4k(self.mapping);
virt_free_range_4k(self.mapping);
PageSize::Regular
};
// This iterative flush is acceptable for same-CPU mappings because no
// broadcast is involved for each iteration.
for page in self.mapping.iter_pages(size) {
flush_address_percpu(page);
}
flush_address_sync(self.mapping.start());
}
}
19 changes: 17 additions & 2 deletions kernel/src/mm/vm/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// Author: Joerg Roedel <[email protected]>

use crate::address::{Address, VirtAddr};
use crate::cpu::flush_tlb_global_sync;
use crate::cpu::{flush_tlb_global_percpu, flush_tlb_global_sync};
use crate::error::SvsmError;
use crate::locking::RWLock;
use crate::mm::pagetable::{PTEntryFlags, PageTable, PageTablePart, PageTableRef};
Expand Down Expand Up @@ -59,6 +59,10 @@ pub struct VMR {
/// [`PTEntryFlags`] global to all mappings in this region. This is a
/// combination of [`PTEntryFlags::GLOBAL`] and [`PTEntryFlags::USER`].
pt_flags: PTEntryFlags,

/// Indicates that this [`struct VMR`] is visible only on a single CPU
/// and therefore TLB flushes do not require broadcast.
per_cpu: bool,
}

impl VMR {
Expand All @@ -82,9 +86,16 @@ impl VMR {
tree: RWLock::new(RBTree::new(VMMAdapter::new())),
pgtbl_parts: RWLock::new(Vec::new()),
pt_flags: flags,
per_cpu: false,
}
}

/// Marks a [`struct VMR`] as being associated with only a single CPU
/// so that TLB flushes do not require broadcast.
pub fn set_per_cpu(&mut self, per_cpu: bool) {
self.per_cpu = per_cpu;
}

/// Allocated all [`PageTablePart`]s needed to map this region
///
/// # Returns
Expand Down Expand Up @@ -425,7 +436,11 @@ impl VMR {
let mut cursor = tree.find_mut(&addr);
if let Some(node) = cursor.get() {
self.unmap_vmm(node);
flush_tlb_global_sync();
if self.per_cpu {
flush_tlb_global_percpu();
} else {
flush_tlb_global_sync();
}
}
cursor.remove().ok_or(SvsmError::Mem)
}
Expand Down

0 comments on commit e1c168b

Please sign in to comment.