Skip to content

Commit

Permalink
Optimization for FlatNetworkFilterList.
Browse files Browse the repository at this point in the history
  • Loading branch information
boocmp committed Jan 24, 2025
1 parent ceeefb4 commit dd800d8
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 28 deletions.
34 changes: 16 additions & 18 deletions src/filters/fb_network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub struct FlatNetworkFiltersListBuilder<'a> {
filters: Vec<WIPOffset<fb::NetworkFilter<'a>>>,

unique_domains: Vec<Hash>,
unique_domains_map: HashMap<Hash, u16>,
}

impl<'a> FlatNetworkFiltersListBuilder<'a> {
Expand All @@ -29,34 +30,31 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> {
builder: flatbuffers::FlatBufferBuilder::new(),
filters: vec![],
unique_domains: vec![],
unique_domains_map: HashMap::new(),
}
}

fn get_or_insert(arr: &mut Vec<Hash>, h: Hash) -> u16 {
if let Some(index) = arr.iter().position(|&x| x == h) {
u16::try_from(index).expect("< u16 max")
} else {
arr.push(h);
u16::try_from(arr.len() - 1).expect("< u16 max")
fn get_or_insert(&mut self, h: &Hash) -> u16 {
if let Some(&index) = self.unique_domains_map.get(h) {
return index;
}
let index = self.unique_domains.len() as u16;
self.unique_domains.push(*h);
self.unique_domains_map.insert(*h, index);
return index;
}

pub fn add(&mut self, network_filter: &NetworkFilter) -> u32 {
let opt_domains = network_filter.opt_domains.as_ref().map(|v| {
let mut o: Vec<u16> = v
.into_iter()
.map(|x| Self::get_or_insert(&mut self.unique_domains, *x))
.collect();
let mut o: Vec<u16> = v.iter().map(|x| self.get_or_insert(x)).collect();

o.sort_unstable();
o.dedup();
self.builder.create_vector(&o)
});

let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| {
let mut o: Vec<u16> = v
.into_iter()
.map(|x| Self::get_or_insert(&mut self.unique_domains, *x))
.collect();
let mut o: Vec<u16> = v.iter().map(|x| self.get_or_insert(x)).collect();
o.sort_unstable();
o.dedup();
self.builder.create_vector(&o)
Expand All @@ -65,23 +63,23 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> {
let modifier_option = network_filter
.modifier_option
.as_ref()
.map(|s| self.builder.create_shared_string(&s));
.map(|s| self.builder.create_string(&s));

let hostname = network_filter
.hostname
.as_ref()
.map(|s| self.builder.create_shared_string(&s));
.map(|s| self.builder.create_string(&s));

let tag = network_filter
.tag
.as_ref()
.map(|s| self.builder.create_shared_string(&s));
.map(|s| self.builder.create_string(&s));

let patterns = if network_filter.filter.iter().len() > 0 {
let offsets: Vec<WIPOffset<&str>> = network_filter
.filter
.iter()
.map(|s| self.builder.create_shared_string(s))
.map(|s| self.builder.create_string(s))
.collect();
Some(self.builder.create_vector(&offsets))
} else {
Expand Down
54 changes: 49 additions & 5 deletions src/network_filter_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,15 +267,59 @@ pub struct FlatNetworkFilterList {

impl NetworkFilterListTrait for FlatNetworkFilterList {
fn new(filters: Vec<NetworkFilter>, optimize: bool) -> Self {
let mut temp_list = NetworkFilterList::new(filters, optimize);
// Compute tokens for all filters
let filter_tokens: Vec<_> = filters
.into_iter()
.map(|filter| {
let tokens = filter.get_tokens();
(filter, tokens)
})
.collect();
// compute the tokens' frequency histogram
let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens);

let mut flat_builder = FlatNetworkFiltersListBuilder::new();
let mut filter_map = HashMap::<Hash, Vec<u32>>::new();
let mut optimizable = HashMap::<Hash, Vec<NetworkFilter>>::new();
{
for (network_filter, multi_tokens) in filter_tokens {
let index = if !optimizer::is_filter_optimizable_by_patterns(&network_filter) {
Some(flat_builder.add(&network_filter))
} else {
None
};

for tokens in multi_tokens {
let mut best_token: Hash = 0;
let mut min_count = total_number_of_tokens + 1;
for token in tokens {
match tokens_histogram.get(&token) {
None => {
min_count = 0;
best_token = token
}
Some(&count) if count < min_count => {
min_count = count;
best_token = token
}
_ => {}
}
}
if let Some(index) = index {
insert_dup(&mut filter_map, best_token, index);
} else {
insert_dup(&mut optimizable, best_token, network_filter.clone());
}
} // tokens
}
}

for (token, v) in optimizable {
let optimized = optimizer::optimize_by_groupping_patterns(v);

for (key, vec) in temp_list.filter_map.drain() {
for filter in vec.into_iter() {
let index = flat_builder.add(&(*filter));
insert_dup(&mut filter_map, key, index);
for filter in optimized {
let index = flat_builder.add(&filter);
insert_dup(&mut filter_map, token, index);
}
}

Expand Down
29 changes: 24 additions & 5 deletions src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,29 @@ trait Optimization {
fn select(&self, filter: &NetworkFilter) -> bool;
}

pub fn is_filter_optimizable_by_patterns(filter: &NetworkFilter) -> bool {
filter.opt_domains.is_none()
&& filter.opt_not_domains.is_none()
&& !filter.is_hostname_anchor()
&& !filter.is_redirect()
&& !filter.is_csp()
}

pub fn optimize_by_groupping_patterns(filters: Vec<NetworkFilter>) -> Vec<NetworkFilter> {
let mut optimized: Vec<NetworkFilter> = Vec::new();

let simple_pattern_group = SimplePatternGroup {};
let (mut fused, mut unfused) = apply_optimisation(&simple_pattern_group, filters);
optimized.append(&mut fused);

// Append whatever is still left unfused
optimized.append(&mut unfused);

// Re-sort the list, now that the order has been perturbed
optimized.sort_by_key(|f| f.id);
optimized
}

/// Fuse `NetworkFilter`s together by applying optimizations sequentially.
pub fn optimize(filters: Vec<NetworkFilter>) -> Vec<NetworkFilter> {
let mut optimized: Vec<NetworkFilter> = Vec::new();
Expand Down Expand Up @@ -129,11 +152,7 @@ impl Optimization for SimplePatternGroup {
format!("{:b}:{:?}", filter.mask, filter.is_complete_regex())
}
fn select(&self, filter: &NetworkFilter) -> bool {
filter.opt_domains.is_none()
&& filter.opt_not_domains.is_none()
&& !filter.is_hostname_anchor()
&& !filter.is_redirect()
&& !filter.is_csp()
is_filter_optimizable_by_patterns(filter)
}
}

Expand Down

0 comments on commit dd800d8

Please sign in to comment.