Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PregSet machine env #177

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions src/fuzzing/func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -645,16 +645,24 @@ pub fn machine_env() -> MachineEnv {
fn regs(r: core::ops::Range<usize>, c: RegClass) -> Vec<PReg> {
r.map(|i| PReg::new(i, c)).collect()
}
let preferred_regs_by_class: [Vec<PReg>; 3] = [
regs(0..24, RegClass::Int),
regs(0..24, RegClass::Float),
regs(0..24, RegClass::Vector),
];
let non_preferred_regs_by_class: [Vec<PReg>; 3] = [
regs(24..32, RegClass::Int),
regs(24..32, RegClass::Float),
regs(24..32, RegClass::Vector),
];
let int_regs: PRegSet = regs(0..24, RegClass::Int).into();
let float_regs: PRegSet = regs(0..24, RegClass::Float).into();
let vector_regs: PRegSet = regs(0..24, RegClass::Vector).into();

let mut preferred_regs_by_class = PRegSet::default();
preferred_regs_by_class.union_from(int_regs);
preferred_regs_by_class.union_from(float_regs);
preferred_regs_by_class.union_from(vector_regs);

let int_regs: PRegSet = regs(24..32, RegClass::Int).into();
let float_regs: PRegSet = regs(24..32, RegClass::Float).into();
let vector_regs: PRegSet = regs(24..32, RegClass::Vector).into();

let mut non_preferred_regs_by_class = PRegSet::default();
non_preferred_regs_by_class.union_from(int_regs);
non_preferred_regs_by_class.union_from(float_regs);
non_preferred_regs_by_class.union_from(vector_regs);

let scratch_by_class: [Option<PReg>; 3] = [None, None, None];
let fixed_stack_slots = (32..63)
.flat_map(|i| {
Expand Down
10 changes: 6 additions & 4 deletions src/ion/liveranges.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,12 @@ impl<'a, F: Function> Env<'a, F> {
self.pregs[preg.index()].is_stack = true;
}
for class in 0..self.preferred_victim_by_class.len() {
self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class]
.last()
.or(self.env.preferred_regs_by_class[class].last())
.cloned()
self.preferred_victim_by_class[class] = self
.env
.non_preferred_regs_by_class
.last_in_class(class)
.or(self.env.preferred_regs_by_class.last_in_class(class))
// .cloned()
.unwrap_or(PReg::invalid());
}
// Create VRegs from the vreg count.
Expand Down
14 changes: 11 additions & 3 deletions src/ion/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1221,9 +1221,17 @@ impl<'a, F: Function> Env<'a, F> {
let mut min_bundles_assigned = 0;
let mut fixed_assigned = 0;
let mut total_regs = 0;
for preg in self.env.preferred_regs_by_class[class as u8 as usize]
.iter()
.chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter())
for preg in self
.env
.preferred_regs_by_class
.to_preg_class(class as u8 as usize)
.into_iter()
.chain(
self.env
.non_preferred_regs_by_class
.to_preg_class(class as u8 as usize)
.into_iter(),
)
{
trace!(" -> PR {:?}", preg);
let start = LiveRangeKey::from_range(&CodeRange {
Expand Down
164 changes: 98 additions & 66 deletions src/ion/reg_traversal.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::{MachineEnv, PReg, RegClass};

use crate::{find_nth, MachineEnv, PReg, RegClass};
/// This iterator represents a traversal through all allocatable
/// registers of a given class, in a certain order designed to
/// minimize allocation contention.
Expand All @@ -14,109 +13,142 @@ use crate::{MachineEnv, PReg, RegClass};
/// usage, these consist of caller-save and callee-save registers
/// respectively, to minimize clobber-saves; but they need not.)

pub struct RegTraversalIter<'a> {
env: &'a MachineEnv,
class: usize,
hints: [Option<PReg>; 2],
hint_idx: usize,
pref_idx: usize,
non_pref_idx: usize,
offset_pref: usize,
offset_non_pref: usize,
pub struct RegTraversalIter {
pref_regs_first: u64,
pref_regs_second: u64,
non_pref_regs_first: u64,
non_pref_regs_second: u64,
hint_regs: u64,
is_fixed: bool,
fixed: Option<PReg>,
class_mask: u8,
Comment on lines +21 to 23
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the fixed-register case is mutually exclusive with all the other cases, we could put that register in the hint_regs, ensure all the other sets are 0, and drop the is_fixed and fixed fields.

}

impl<'a> RegTraversalIter<'a> {
impl RegTraversalIter {
pub fn new(
env: &'a MachineEnv,
env: &MachineEnv,
class: RegClass,
hint_reg: PReg,
hint2_reg: PReg,
offset: usize,
fixed: Option<PReg>,
) -> Self {
let mut hint_reg = if hint_reg != PReg::invalid() {
Some(hint_reg)
} else {
None
};
let mut hint2_reg = if hint2_reg != PReg::invalid() {
Some(hint2_reg)
} else {
None
};
// get a mask for the hint registers
let mut hint_mask = 0u64;

if hint_reg.is_none() {
hint_reg = hint2_reg;
hint2_reg = None;
if hint_reg != PReg::invalid() {
let mask = 1u64 << (hint_reg.bits & 0b0011_1111);
hint_mask |= mask;
}
let hints = [hint_reg, hint2_reg];

if hint2_reg != PReg::invalid() {
let mask = 1u64 << (hint2_reg.bits & 0b0011_1111);
hint_mask |= mask;
}

Comment on lines -50 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this so much better than having two separate Option<PReg> for the hint registers!

I was going to be a little concerned that this could change the order that the hint registers are returned in. However it turns out that at most one hint register is ever used at a time, and that's been true since regalloc2 0.0.1 in 2021. If you want to delete hint2_reg I think that would be worth doing, but it doesn't have to be in this PR.

I would like two changes here though:

  1. debug_assert_eq!(hint_reg.class(), reg_class); (and same for hint2_reg if you don't delete it)
  2. use hint_reg.hw_enc() to get the shift amount instead of accessing bits directly. (I considered suggesting a debug-assert that this is less than 64, but that is automatic, part of https://github.com/rust-lang/rfcs/blob/master/text/0560-integer-overflow.md.)

let class = class as u8 as usize;
let offset_pref = if env.preferred_regs_by_class[class].len() > 0 {
offset % env.preferred_regs_by_class[class].len()

let pref_regs_by_class = env.preferred_regs_by_class.bits[class];
let non_pref_regs_by_class = env.non_preferred_regs_by_class.bits[class];

let n_pref_regs = pref_regs_by_class.count_ones() as usize;
let n_non_pref_regs = non_pref_regs_by_class.count_ones() as usize;

let offset_pref = if n_pref_regs > 0 {
offset % n_pref_regs
} else {
0
};
let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 {
offset % env.non_preferred_regs_by_class[class].len()
let offset_non_pref = if n_non_pref_regs > 0 {
offset % n_non_pref_regs
} else {
0
};

// we want to split the pref registers bit vectors into two sets
// with the offset lowest bits in one and the rest in the other
let split_num = (n_pref_regs - offset_pref) as u64;
let split_pos = find_nth(pref_regs_by_class, split_num);
let mask = (1 << split_pos) - 1;
let pref_regs_first = pref_regs_by_class & !mask;
let pref_regs_second = pref_regs_by_class & mask;

let split_num = (n_non_pref_regs - offset_non_pref) as u64;
let split_pos = find_nth(non_pref_regs_by_class, split_num);
let mask = (1 << split_pos) - 1;
let non_pref_regs_first = non_pref_regs_by_class & !mask;
let non_pref_regs_second = non_pref_regs_by_class & mask;

// remove the hint registers from the bit vectors
let pref_regs_first = pref_regs_first & !hint_mask;
let pref_regs_second = pref_regs_second & !hint_mask;
let non_pref_regs_first = non_pref_regs_first & !hint_mask;
let non_pref_regs_second = non_pref_regs_second & !hint_mask;

Comment on lines +82 to +86
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you remove the hint registers from pref_regs_by_class and non_pref_regs_by_class above, before splitting the sets, then you would only have to mask twice instead of four times.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'd done it this way in order to be as close to the original split as possible: I may try they way Chris suggested, that doesn't depend on doing a bit trick to find the split point instead as it does seem 'simplest'

let class_mask = (class as u8) << 6;

Self {
env,
class,
hints,
hint_idx: 0,
pref_idx: 0,
non_pref_idx: 0,
offset_pref,
offset_non_pref,
pref_regs_first,
pref_regs_second,
non_pref_regs_first,
non_pref_regs_second,
hint_regs: hint_mask,
is_fixed: fixed.is_some(),
fixed,
class_mask,
}
}
}

impl<'a> core::iter::Iterator for RegTraversalIter<'a> {
impl core::iter::Iterator for RegTraversalIter {
type Item = PReg;

fn next(&mut self) -> Option<PReg> {
// only take the fixed register if it exists
if self.is_fixed {
let ret = self.fixed;
self.fixed = None;
return ret;
}

fn wrap(idx: usize, limit: usize) -> usize {
if idx >= limit {
idx - limit
} else {
idx
}
// if there are hints, return them first
if self.hint_regs != 0 {
let index = self.hint_regs.trailing_zeros() as u8;
self.hint_regs &= !(1u64 << index);
let reg_index = index as u8 | self.class_mask;
return Some(PReg::from(reg_index));
}
Comment on lines +118 to +119
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather not introduce a From-conversion from u8 to PReg. I would go ahead and store the enum RegClass and use the PReg::new constructor, I think. Conceptually I like the micro-optimization of only doing the left-shift once in RegTraversalIter::new(), but it only saves one instruction and makes the code harder to read and reason about, so I don't think it's worth it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm I'd actually done this for the opposite reason: I found it easier to reason/think about what was occurring when I could think of it as a 2 bit number for the class and 6 bit number for the position in the class, though that also requires fully committing to that representation, as opposed to mentally going through the associated constants and casts between types (I'm not actually sure how much leeway there is to change the bitpacked representation as doing it this way definitely would concrete it as it is)


// iterate over the preferred register rotated by offset
// iterate over first half
if self.pref_regs_first != 0 {
let index = self.pref_regs_first.trailing_zeros() as u8;
self.pref_regs_first &= !(1u64 << index);
let reg_index = index as u8 | self.class_mask;
return Some(PReg::from(reg_index));
}
if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() {
let h = self.hints[self.hint_idx];
self.hint_idx += 1;
return h;
// iterate over second half
if self.pref_regs_second != 0 {
let index = self.pref_regs_second.trailing_zeros() as u8;
self.pref_regs_second &= !(1u64 << index);
let reg_index = index as u8 | self.class_mask;
return Some(PReg::from(reg_index));
}
while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() {
let arr = &self.env.preferred_regs_by_class[self.class][..];
let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())];
self.pref_idx += 1;
if Some(r) == self.hints[0] || Some(r) == self.hints[1] {
continue;
}
return Some(r);

// iterate over the nonpreferred register rotated by offset
// iterate over first half
if self.non_pref_regs_first != 0 {
let index = self.non_pref_regs_first.trailing_zeros() as u8;
self.non_pref_regs_first &= !(1u64 << index);
let reg_index = index as u8 | self.class_mask;
return Some(PReg::from(reg_index));
}
while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() {
let arr = &self.env.non_preferred_regs_by_class[self.class][..];
let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())];
self.non_pref_idx += 1;
if Some(r) == self.hints[0] || Some(r) == self.hints[1] {
continue;
}
return Some(r);
// iterate over second half
if self.non_pref_regs_second != 0 {
let index = self.non_pref_regs_second.trailing_zeros() as u8;
self.non_pref_regs_second &= !(1u64 << index);
let reg_index = index as u8 | self.class_mask;
return Some(PReg::from(reg_index));
}
Comment on lines +115 to +151
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because all five of these if statements do the same thing, I would suggest defining an array of [u64; 5] in struct RegTraversalIter, and looping over its elements here.

If you do that, there are additional tricks you could use, but they may not be faster at this scale, so I think it would be good to stop there.

None
}
Expand Down
Loading
Loading