Skip to content

Commit

Permalink
Merge pull request #417 from msft-jlange/tlb_flush
Browse files Browse the repository at this point in the history
cpu/tlb: do not broadcast per-cpu TLB flushes
  • Loading branch information
00xc authored Jul 25, 2024
2 parents 33a191e + e1c168b commit fcb76e5
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 21 deletions.
10 changes: 7 additions & 3 deletions kernel/src/cpu/control_regs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ pub fn cr4_init() {

cr4.insert(CR4Flags::PSE); // Enable Page Size Extensions

if cpu_has_pge() {
cr4.insert(CR4Flags::PGE); // Enable Global Pages
}
// All processors that are capable of virtualization will support global
// page table entries, so there is no reason to support any processor that
// does not enumerate PGE capability.
assert!(cpu_has_pge(), "CPU does not support PGE");

cr4.insert(CR4Flags::PGE); // Enable Global Pages
write_cr4(cr4);
}

bitflags! {
#[derive(Debug, Clone, Copy)]
pub struct CR0Flags: u64 {
const PE = 1 << 0; // Protection Enabled
const MP = 1 << 1; // Monitor Coprocessor
Expand Down Expand Up @@ -106,6 +109,7 @@ pub fn write_cr3(cr3: PhysAddr) {
}

bitflags! {
#[derive(Debug, Clone, Copy)]
pub struct CR4Flags: u64 {
const VME = 1 << 0; // Virtual-8086 Mode Extensions
const PVI = 1 << 1; // Protected-Mode Virtual Interrupts
Expand Down
8 changes: 5 additions & 3 deletions kernel/src/cpu/efer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ pub fn write_efer(efer: EFERFlags) {
pub fn efer_init() {
let mut efer = read_efer();

if cpu_has_nx() {
efer.insert(EFERFlags::NXE);
}
// All processors that are capable of virtualization will support
// no-execute table entries, so there is no reason to support any processor
// that does not enumerate NX capability.
assert!(cpu_has_nx(), "CPU does not support NX");

efer.insert(EFERFlags::NXE);
write_efer(efer);
}
7 changes: 6 additions & 1 deletion kernel/src/cpu/percpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,12 @@ impl PerCpu {
tss: Cell::new(X86Tss::new()),
svsm_vmsa: OnceCell::new(),
reset_ip: Cell::new(0xffff_fff0),
vm_range: VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL),
vm_range: {
let mut vmr = VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL);
vmr.set_per_cpu(true);
vmr
},

vrange_4k: RefCell::new(VirtualRange::new()),
vrange_2m: RefCell::new(VirtualRange::new()),
runqueue: RefCell::new(RunQueue::new()),
Expand Down
17 changes: 17 additions & 0 deletions kernel/src/cpu/tlb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
// Author: Joerg Roedel <[email protected]>

use crate::address::{Address, VirtAddr};
use crate::cpu::control_regs::{read_cr4, write_cr4, CR4Flags};

use core::arch::asm;

const INVLPGB_VALID_VA: u64 = 1u64 << 0;
Expand Down Expand Up @@ -50,6 +52,21 @@ pub fn flush_tlb_global_sync() {
do_tlbsync();
}

pub fn flush_tlb_global_percpu() {
let cr4 = read_cr4();
write_cr4(cr4 ^ CR4Flags::PGE);
write_cr4(cr4);
}

pub fn flush_address_percpu(va: VirtAddr) {
let va: u64 = va.page_align().bits() as u64;
unsafe {
asm!("invlpg (%rax)",
in("rax") va,
options(att_syntax));
}
}

pub fn flush_address(va: VirtAddr) {
let rax: u64 = (va.page_align().bits() as u64)
| INVLPGB_VALID_VA
Expand Down
9 changes: 1 addition & 8 deletions kernel/src/mm/pagetable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

use crate::address::{Address, PhysAddr, VirtAddr};
use crate::cpu::control_regs::write_cr3;
use crate::cpu::features::{cpu_has_nx, cpu_has_pge};
use crate::cpu::flush_tlb_global_sync;
use crate::error::SvsmError;
use crate::locking::{LockGuard, SpinLock};
Expand Down Expand Up @@ -57,13 +56,7 @@ pub fn paging_init_early(platform: &dyn SvsmPlatform, vtom: u64) -> ImmutAfterIn
pub fn paging_init(platform: &dyn SvsmPlatform, vtom: u64) -> ImmutAfterInitResult<()> {
init_encrypt_mask(platform, vtom.try_into().unwrap())?;

let mut feature_mask = PTEntryFlags::all();
if !cpu_has_nx() {
feature_mask.remove(PTEntryFlags::NX);
}
if !cpu_has_pge() {
feature_mask.remove(PTEntryFlags::GLOBAL);
}
let feature_mask = PTEntryFlags::all();
FEATURE_MASK.reinit(&feature_mask)
}

Expand Down
14 changes: 10 additions & 4 deletions kernel/src/mm/ptguards.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
use super::pagetable::PTEntryFlags;
use crate::address::{Address, PhysAddr, VirtAddr};
use crate::cpu::percpu::this_cpu;
use crate::cpu::tlb::flush_address_sync;
use crate::cpu::tlb::flush_address_percpu;
use crate::error::SvsmError;
use crate::mm::virtualrange::{
virt_alloc_range_2m, virt_alloc_range_4k, virt_free_range_2m, virt_free_range_4k,
};
use crate::types::{PAGE_SIZE, PAGE_SIZE_2M};
use crate::types::{PageSize, PAGE_SIZE, PAGE_SIZE_2M};

use crate::utils::MemoryRegion;

Expand Down Expand Up @@ -99,13 +99,19 @@ impl PerCPUPageMappingGuard {

impl Drop for PerCPUPageMappingGuard {
fn drop(&mut self) {
if self.huge {
let size = if self.huge {
this_cpu().get_pgtable().unmap_region_2m(self.mapping);
virt_free_range_2m(self.mapping);
PageSize::Huge
} else {
this_cpu().get_pgtable().unmap_region_4k(self.mapping);
virt_free_range_4k(self.mapping);
PageSize::Regular
};
// This iterative flush is acceptable for same-CPU mappings because no
// broadcast is involved for each iteration.
for page in self.mapping.iter_pages(size) {
flush_address_percpu(page);
}
flush_address_sync(self.mapping.start());
}
}
19 changes: 17 additions & 2 deletions kernel/src/mm/vm/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// Author: Joerg Roedel <[email protected]>

use crate::address::{Address, VirtAddr};
use crate::cpu::flush_tlb_global_sync;
use crate::cpu::{flush_tlb_global_percpu, flush_tlb_global_sync};
use crate::error::SvsmError;
use crate::locking::RWLock;
use crate::mm::pagetable::{PTEntryFlags, PageTable, PageTablePart, PageTableRef};
Expand Down Expand Up @@ -59,6 +59,10 @@ pub struct VMR {
/// [`PTEntryFlags`] global to all mappings in this region. This is a
/// combination of [`PTEntryFlags::GLOBAL`] and [`PTEntryFlags::USER`].
pt_flags: PTEntryFlags,

/// Indicates that this [`struct VMR`] is visible only on a single CPU
/// and therefore TLB flushes do not require broadcast.
per_cpu: bool,
}

impl VMR {
Expand All @@ -82,9 +86,16 @@ impl VMR {
tree: RWLock::new(RBTree::new(VMMAdapter::new())),
pgtbl_parts: RWLock::new(Vec::new()),
pt_flags: flags,
per_cpu: false,
}
}

/// Marks a [`struct VMR`] as being associated with only a single CPU
/// so that TLB flushes do not require broadcast.
pub fn set_per_cpu(&mut self, per_cpu: bool) {
self.per_cpu = per_cpu;
}

/// Allocated all [`PageTablePart`]s needed to map this region
///
/// # Returns
Expand Down Expand Up @@ -425,7 +436,11 @@ impl VMR {
let mut cursor = tree.find_mut(&addr);
if let Some(node) = cursor.get() {
self.unmap_vmm(node);
flush_tlb_global_sync();
if self.per_cpu {
flush_tlb_global_percpu();
} else {
flush_tlb_global_sync();
}
}
cursor.remove().ok_or(SvsmError::Mem)
}
Expand Down

0 comments on commit fcb76e5

Please sign in to comment.