Skip to content

Commit

Permalink
low hanging fruit in optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Jun 6, 2024
1 parent 8151925 commit 705b50f
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 20 deletions.
40 changes: 30 additions & 10 deletions src/query/phrase_prefix_query/phrase_prefix_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::docset::{DocSet, TERMINATED};
use crate::fieldnorm::FieldNormReader;
use crate::postings::Postings;
use crate::query::bm25::Bm25Weight;
use crate::query::phrase_query::{intersection_count, PhraseScorer};
use crate::query::phrase_query::{intersection_count, intersection_exists, PhraseScorer};
use crate::query::Scorer;
use crate::{DocId, Score};

Expand Down Expand Up @@ -92,22 +92,25 @@ impl<TPostings: Postings> Scorer for PhraseKind<TPostings> {
}
}

pub struct PhrasePrefixScorer<TPostings: Postings> {
pub struct PhrasePrefixScorer<TPostings: Postings, const SCORING_ENABLED: bool> {
phrase_scorer: PhraseKind<TPostings>,
suffixes: Vec<TPostings>,
suffix_offset: u32,
phrase_count: u32,
suffix_position_buffer: Vec<u32>,
}

impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
impl<TPostings: Postings, const SCORING_ENABLED: bool>
PhrasePrefixScorer<TPostings, SCORING_ENABLED>
{
// If similarity_weight is None, then scoring is disabled.
pub fn new(
mut term_postings: Vec<(usize, TPostings)>,
similarity_weight_opt: Option<Bm25Weight>,
fieldnorm_reader: FieldNormReader,
suffixes: Vec<TPostings>,
suffix_pos: usize,
) -> PhrasePrefixScorer<TPostings> {
) -> PhrasePrefixScorer<TPostings, SCORING_ENABLED> {
// correct indices so we can merge with our suffix term the PhraseScorer doesn't know about
let max_offset = term_postings
.iter()
Expand Down Expand Up @@ -140,6 +143,7 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
suffixes,
suffix_offset: (max_offset - suffix_pos) as u32,
phrase_count: 0,
suffix_position_buffer: Vec::with_capacity(100),
};
if phrase_prefix_scorer.doc() != TERMINATED && !phrase_prefix_scorer.matches_prefix() {
phrase_prefix_scorer.advance();
Expand All @@ -153,7 +157,6 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {

fn matches_prefix(&mut self) -> bool {
let mut count = 0;
let mut positions = Vec::new();
let current_doc = self.doc();
let pos_matching = self.phrase_scorer.get_intersection();
for suffix in &mut self.suffixes {
Expand All @@ -162,16 +165,27 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
}
let doc = suffix.seek(current_doc);
if doc == current_doc {
suffix.positions_with_offset(self.suffix_offset, &mut positions);
count += intersection_count(pos_matching, &positions);
suffix.positions_with_offset(self.suffix_offset, &mut self.suffix_position_buffer);
if SCORING_ENABLED {
count += intersection_count(pos_matching, &self.suffix_position_buffer);
} else {
if intersection_exists(pos_matching, &self.suffix_position_buffer) {
return true;
}
}

Check warning on line 175 in src/query/phrase_prefix_query/phrase_prefix_scorer.rs

View workflow job for this annotation

GitHub Actions / clippy

this `else { if .. }` block can be collapsed

warning: this `else { if .. }` block can be collapsed --> src/query/phrase_prefix_query/phrase_prefix_scorer.rs:171:24 | 171 | } else { | ________________________^ 172 | | if intersection_exists(pos_matching, &self.suffix_position_buffer) { 173 | | return true; 174 | | } 175 | | } | |_________________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_else_if = note: `#[warn(clippy::collapsible_else_if)]` on by default help: collapse nested if block | 171 ~ } else if intersection_exists(pos_matching, &self.suffix_position_buffer) { 172 + return true; 173 + } |
}
}
if !SCORING_ENABLED {
return false;
}
self.phrase_count = count as u32;
count != 0
}
}

impl<TPostings: Postings> DocSet for PhrasePrefixScorer<TPostings> {
impl<TPostings: Postings, const SCORING_ENABLED: bool> DocSet
for PhrasePrefixScorer<TPostings, SCORING_ENABLED>
{
fn advance(&mut self) -> DocId {
loop {
let doc = self.phrase_scorer.advance();
Expand All @@ -198,9 +212,15 @@ impl<TPostings: Postings> DocSet for PhrasePrefixScorer<TPostings> {
}
}

impl<TPostings: Postings> Scorer for PhrasePrefixScorer<TPostings> {
impl<TPostings: Postings, const SCORING_ENABLED: bool> Scorer
for PhrasePrefixScorer<TPostings, SCORING_ENABLED>
{
fn score(&mut self) -> Score {
if SCORING_ENABLED {
self.phrase_scorer.score()
} else {
1.0f32
}
// TODO modify score??
self.phrase_scorer.score()
}
}
53 changes: 45 additions & 8 deletions src/query/phrase_prefix_query/phrase_prefix_weight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ impl PhrasePrefixWeight {
Ok(FieldNormReader::constant(reader.max_doc(), 1))
}

pub(crate) fn phrase_scorer(
pub(crate) fn phrase_prefix_scorer<const SCORING_ENABLED: bool>(
&self,
reader: &SegmentReader,
boost: Score,
) -> crate::Result<Option<PhrasePrefixScorer<SegmentPostings>>> {
) -> crate::Result<Option<PhrasePrefixScorer<SegmentPostings, SCORING_ENABLED>>> {
let similarity_weight_opt = self
.similarity_weight_opt
.as_ref()
Expand Down Expand Up @@ -128,15 +128,20 @@ impl PhrasePrefixWeight {

impl Weight for PhrasePrefixWeight {
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
if let Some(scorer) = self.phrase_scorer(reader, boost)? {
Ok(Box::new(scorer))
if self.similarity_weight_opt.is_some() {
if let Some(scorer) = self.phrase_prefix_scorer::<true>(reader, boost)? {
return Ok(Box::new(scorer));
}
} else {
Ok(Box::new(EmptyScorer))
if let Some(scorer) = self.phrase_prefix_scorer::<false>(reader, boost)? {
return Ok(Box::new(scorer));
}
}

Check warning on line 139 in src/query/phrase_prefix_query/phrase_prefix_weight.rs

View workflow job for this annotation

GitHub Actions / clippy

this `else { if .. }` block can be collapsed

warning: this `else { if .. }` block can be collapsed --> src/query/phrase_prefix_query/phrase_prefix_weight.rs:135:16 | 135 | } else { | ________________^ 136 | | if let Some(scorer) = self.phrase_prefix_scorer::<false>(reader, boost)? { 137 | | return Ok(Box::new(scorer)); 138 | | } 139 | | } | |_________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_else_if help: collapse nested if block | 135 ~ } else if let Some(scorer) = self.phrase_prefix_scorer::<false>(reader, boost)? { 136 + return Ok(Box::new(scorer)); 137 + } |
Ok(Box::new(EmptyScorer))
}

fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
let scorer_opt = self.phrase_scorer(reader, 1.0)?;
let scorer_opt = self.phrase_prefix_scorer::<true>(reader, 1.0)?;
if scorer_opt.is_none() {
return Err(does_not_match(doc));
}
Expand Down Expand Up @@ -200,7 +205,7 @@ mod tests {
.unwrap()
.unwrap();
let mut phrase_scorer = phrase_weight
.phrase_scorer(searcher.segment_reader(0u32), 1.0)?
.phrase_prefix_scorer::<true>(searcher.segment_reader(0u32), 1.0)?
.unwrap();
assert_eq!(phrase_scorer.doc(), 1);
assert_eq!(phrase_scorer.phrase_count(), 2);
Expand All @@ -211,6 +216,38 @@ mod tests {
Ok(())
}

#[test]
pub fn test_phrase_no_count() -> crate::Result<()> {
let index = create_index(&[
"aa bb dd cc",
"aa aa bb c dd aa bb cc aa bb dc",
" aa bb cd",
])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader()?.searcher();
let phrase_query = PhrasePrefixQuery::new(vec![
Term::from_field_text(text_field, "aa"),
Term::from_field_text(text_field, "bb"),
Term::from_field_text(text_field, "c"),
]);
let enable_scoring = EnableScoring::enabled_from_searcher(&searcher);
let phrase_weight = phrase_query
.phrase_prefix_query_weight(enable_scoring)
.unwrap()
.unwrap();
let mut phrase_scorer = phrase_weight
.phrase_prefix_scorer::<false>(searcher.segment_reader(0u32), 1.0)?
.unwrap();
assert_eq!(phrase_scorer.doc(), 1);
assert_eq!(phrase_scorer.phrase_count(), 1);
assert_eq!(phrase_scorer.advance(), 2);
assert_eq!(phrase_scorer.doc(), 2);
assert_eq!(phrase_scorer.phrase_count(), 1);
assert_eq!(phrase_scorer.advance(), TERMINATED);
Ok(())
}

#[test]
pub fn test_phrase_count_mid() -> crate::Result<()> {
let index = create_index(&["aa dd cc", "aa aa bb c dd aa bb cc aa dc", " aa bb cd"])?;
Expand All @@ -227,7 +264,7 @@ mod tests {
.unwrap()
.unwrap();
let mut phrase_scorer = phrase_weight
.phrase_scorer(searcher.segment_reader(0u32), 1.0)?
.phrase_prefix_scorer::<true>(searcher.segment_reader(0u32), 1.0)?
.unwrap();
assert_eq!(phrase_scorer.doc(), 1);
assert_eq!(phrase_scorer.phrase_count(), 2);
Expand Down
2 changes: 1 addition & 1 deletion src/query/phrase_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ mod phrase_scorer;
mod phrase_weight;

pub use self::phrase_query::PhraseQuery;
pub(crate) use self::phrase_scorer::intersection_count;
pub use self::phrase_scorer::PhraseScorer;
pub(crate) use self::phrase_scorer::{intersection_count, intersection_exists};
pub use self::phrase_weight::PhraseWeight;

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion src/query/phrase_query/phrase_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub struct PhraseScorer<TPostings: Postings> {
}

/// Returns true if and only if the two sorted arrays contain a common element
fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
pub(crate) fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
let mut left_index = 0;
let mut right_index = 0;
while left_index < left.len() && right_index < right.len() {
Expand Down

0 comments on commit 705b50f

Please sign in to comment.