Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpu/tlb: do not broadcast per-cpu TLB flushes #417

Merged
merged 2 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions kernel/src/cpu/control_regs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ pub fn cr4_init() {

cr4.insert(CR4Flags::PSE); // Enable Page Size Extensions

if cpu_has_pge() {
cr4.insert(CR4Flags::PGE); // Enable Global Pages
}
// All processors that are capable of virtualization will support global
// page table entries, so there is no reason to support any processor that
// does not enumerate PGE capability.
assert!(cpu_has_pge(), "CPU does not support PGE");

cr4.insert(CR4Flags::PGE); // Enable Global Pages
write_cr4(cr4);
}

bitflags! {
#[derive(Debug, Clone, Copy)]
pub struct CR0Flags: u64 {
const PE = 1 << 0; // Protection Enabled
const MP = 1 << 1; // Monitor Coprocessor
Expand Down Expand Up @@ -106,6 +109,7 @@ pub fn write_cr3(cr3: PhysAddr) {
}

bitflags! {
#[derive(Debug, Clone, Copy)]
pub struct CR4Flags: u64 {
const VME = 1 << 0; // Virtual-8086 Mode Extensions
const PVI = 1 << 1; // Protected-Mode Virtual Interrupts
Expand Down
8 changes: 5 additions & 3 deletions kernel/src/cpu/efer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ pub fn write_efer(efer: EFERFlags) {
pub fn efer_init() {
let mut efer = read_efer();

if cpu_has_nx() {
efer.insert(EFERFlags::NXE);
}
// All processors that are capable of virtualization will support
// no-execute table entries, so there is no reason to support any processor
// that does not enumerate NX capability.
assert!(cpu_has_nx(), "CPU does not support NX");

efer.insert(EFERFlags::NXE);
write_efer(efer);
}
7 changes: 6 additions & 1 deletion kernel/src/cpu/percpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,12 @@ impl PerCpu {
tss: Cell::new(X86Tss::new()),
svsm_vmsa: OnceCell::new(),
reset_ip: Cell::new(0xffff_fff0),
vm_range: VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL),
vm_range: {
let mut vmr = VMR::new(SVSM_PERCPU_BASE, SVSM_PERCPU_END, PTEntryFlags::GLOBAL);
vmr.set_per_cpu(true);
vmr
},

vrange_4k: RefCell::new(VirtualRange::new()),
vrange_2m: RefCell::new(VirtualRange::new()),
runqueue: RefCell::new(RunQueue::new()),
Expand Down
17 changes: 17 additions & 0 deletions kernel/src/cpu/tlb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
// Author: Joerg Roedel <[email protected]>

use crate::address::{Address, VirtAddr};
use crate::cpu::control_regs::{read_cr4, write_cr4, CR4Flags};

use core::arch::asm;

const INVLPGB_VALID_VA: u64 = 1u64 << 0;
Expand Down Expand Up @@ -50,6 +52,21 @@ pub fn flush_tlb_global_sync() {
do_tlbsync();
}

pub fn flush_tlb_global_percpu() {
let cr4 = read_cr4();
write_cr4(cr4 ^ CR4Flags::PGE);
write_cr4(cr4);
}

pub fn flush_address_percpu(va: VirtAddr) {
let va: u64 = va.page_align().bits() as u64;
unsafe {
asm!("invlpg (%rax)",
in("rax") va,
options(att_syntax));
}
}

pub fn flush_address(va: VirtAddr) {
let rax: u64 = (va.page_align().bits() as u64)
| INVLPGB_VALID_VA
Expand Down
9 changes: 1 addition & 8 deletions kernel/src/mm/pagetable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

use crate::address::{Address, PhysAddr, VirtAddr};
use crate::cpu::control_regs::write_cr3;
use crate::cpu::features::{cpu_has_nx, cpu_has_pge};
use crate::cpu::flush_tlb_global_sync;
use crate::error::SvsmError;
use crate::locking::{LockGuard, SpinLock};
Expand Down Expand Up @@ -57,13 +56,7 @@ pub fn paging_init_early(platform: &dyn SvsmPlatform, vtom: u64) -> ImmutAfterIn
pub fn paging_init(platform: &dyn SvsmPlatform, vtom: u64) -> ImmutAfterInitResult<()> {
init_encrypt_mask(platform, vtom.try_into().unwrap())?;

let mut feature_mask = PTEntryFlags::all();
if !cpu_has_nx() {
feature_mask.remove(PTEntryFlags::NX);
}
if !cpu_has_pge() {
feature_mask.remove(PTEntryFlags::GLOBAL);
}
let feature_mask = PTEntryFlags::all();
FEATURE_MASK.reinit(&feature_mask)
}

Expand Down
14 changes: 10 additions & 4 deletions kernel/src/mm/ptguards.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
use super::pagetable::PTEntryFlags;
use crate::address::{Address, PhysAddr, VirtAddr};
use crate::cpu::percpu::this_cpu;
use crate::cpu::tlb::flush_address_sync;
use crate::cpu::tlb::flush_address_percpu;
use crate::error::SvsmError;
use crate::mm::virtualrange::{
virt_alloc_range_2m, virt_alloc_range_4k, virt_free_range_2m, virt_free_range_4k,
};
use crate::types::{PAGE_SIZE, PAGE_SIZE_2M};
use crate::types::{PageSize, PAGE_SIZE, PAGE_SIZE_2M};

use crate::utils::MemoryRegion;

Expand Down Expand Up @@ -99,13 +99,19 @@ impl PerCPUPageMappingGuard {

impl Drop for PerCPUPageMappingGuard {
fn drop(&mut self) {
if self.huge {
let size = if self.huge {
this_cpu().get_pgtable().unmap_region_2m(self.mapping);
virt_free_range_2m(self.mapping);
PageSize::Huge
} else {
this_cpu().get_pgtable().unmap_region_4k(self.mapping);
virt_free_range_4k(self.mapping);
PageSize::Regular
};
// This iterative flush is acceptable for same-CPU mappings because no
// broadcast is involved for each iteration.
for page in self.mapping.iter_pages(size) {
flush_address_percpu(page);
}
flush_address_sync(self.mapping.start());
}
}
19 changes: 17 additions & 2 deletions kernel/src/mm/vm/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// Author: Joerg Roedel <[email protected]>

use crate::address::{Address, VirtAddr};
use crate::cpu::flush_tlb_global_sync;
use crate::cpu::{flush_tlb_global_percpu, flush_tlb_global_sync};
use crate::error::SvsmError;
use crate::locking::RWLock;
use crate::mm::pagetable::{PTEntryFlags, PageTable, PageTablePart, PageTableRef};
Expand Down Expand Up @@ -59,6 +59,10 @@ pub struct VMR {
/// [`PTEntryFlags`] global to all mappings in this region. This is a
/// combination of [`PTEntryFlags::GLOBAL`] and [`PTEntryFlags::USER`].
pt_flags: PTEntryFlags,

/// Indicates that this [`struct VMR`] is visible only on a single CPU
/// and therefore TLB flushes do not require broadcast.
per_cpu: bool,
}

impl VMR {
Expand All @@ -82,9 +86,16 @@ impl VMR {
tree: RWLock::new(RBTree::new(VMMAdapter::new())),
pgtbl_parts: RWLock::new(Vec::new()),
pt_flags: flags,
per_cpu: false,
}
}

/// Marks a [`struct VMR`] as being associated with only a single CPU
/// so that TLB flushes do not require broadcast.
pub fn set_per_cpu(&mut self, per_cpu: bool) {
self.per_cpu = per_cpu;
}

/// Allocated all [`PageTablePart`]s needed to map this region
///
/// # Returns
Expand Down Expand Up @@ -425,7 +436,11 @@ impl VMR {
let mut cursor = tree.find_mut(&addr);
if let Some(node) = cursor.get() {
self.unmap_vmm(node);
flush_tlb_global_sync();
if self.per_cpu {
flush_tlb_global_percpu();
} else {
flush_tlb_global_sync();
}
}
cursor.remove().ok_or(SvsmError::Mem)
}
Expand Down
Loading