Skip to content

Commit

Permalink
Fix file reading memoization (#8)
Browse files Browse the repository at this point in the history
* fix items_after_test_module lint

* rm memoization

* better python ex documentation

---------

Co-authored-by: noahbaculi <[email protected]>
  • Loading branch information
noahbaculi and noahbaculi authored Oct 31, 2023
1 parent 54e72da commit 8875334
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 86 deletions.
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "busca"
version = "2.2.1"
version = "2.3.1"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -21,7 +21,6 @@ console = "0.15.5"
glob = "0.3.1"
indicatif = { version = "0.17.3", features = ["rayon"] }
inquire = "0.6.1"
memoize = "0.4.0"
pyo3 = { version = "0.19.2", features = ["extension-module"] }
rayon = "1.7.0"
similar = { version = "2.2.1", features = ["inline"] }
Expand Down
Empty file added python/__init__.py
Empty file.
6 changes: 3 additions & 3 deletions python/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@ def example_usage():
reference_string = file.read()

# Perform search with required parameters
all_file_matches = busca.search_for_lines(
all_file_matches: list[busca.FileMatch] = busca.search_for_lines(
reference_string=reference_string,
search_path="./sample_dir_hello_world",
)

# File matches are returned in descending order of percent match
closest_file_match = all_file_matches[0]
closest_file_match: busca.FileMatch = all_file_matches[0]
assert closest_file_match.path == reference_file_path
assert closest_file_match.percent_match == 1.0
assert closest_file_match.lines == reference_string

# Perform search for top 5 matches with additional filters
# to speed up runtime by skipping files that will not match
relevant_file_matches = busca.search_for_lines(
relevant_file_matches: list[busca.FileMatch] = busca.search_for_lines(
reference_string=reference_string,
search_path="./sample_dir_hello_world",
max_lines=10_000,
Expand Down
54 changes: 26 additions & 28 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use glob::Pattern;
use memoize::memoize;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
Expand Down Expand Up @@ -327,7 +326,6 @@ pub fn compare_file(comp_path: PathBuf, args: &Args, ref_lines: &str) -> Option<
})
}

#[memoize]
fn read_file(comp_path: PathBuf) -> Option<String> {
let comp_reader = fs::read_to_string(comp_path);
let comp_lines = match comp_reader {
Expand All @@ -340,6 +338,32 @@ fn read_file(comp_path: PathBuf) -> Option<String> {
Some(comp_lines)
}

/// Returns the percentage of lines from `ref_lines` that also exist in `comp_lines`.
///
///
/// # Examples
///
/// ```
/// // ✓ ✓ x ✓ x = 3
/// let ref_lines = "12\n14\n5\n17\n19\n";
/// let comp_lines = "11\n12\n13\n14\n15\n16\n\n17\n18\n";
/// let result = busca::get_percent_matching_lines(ref_lines, comp_lines);
/// assert_eq!(result, 3.0 / 7.0);
/// ```
/// ---
/// ```
/// // ✓ ✓ x x = 2 / 4 = 0.5
/// let ref_lines = "12\n14\n5\n17";
/// let comp_lines = "11\n12\n13\n14\n15\n16\n\n17\n18\n";
/// let result = busca::get_percent_matching_lines(ref_lines, comp_lines);
/// assert_eq!(result, 4.0 / 13.0);
/// ```
///
pub fn get_percent_matching_lines(ref_lines: &str, comp_lines: &str) -> f32 {
let diff = TextDiff::from_lines(ref_lines, comp_lines);
diff.ratio()
}

#[cfg(test)]
mod test_compare_file {
use super::*;
Expand Down Expand Up @@ -471,29 +495,3 @@ mod test_compare_file {
assert_eq!(file_comparison, None);
}
}

/// Returns the percentage of lines from `ref_lines` that also exist in `comp_lines`.
///
///
/// # Examples
///
/// ```
/// // ✓ ✓ x ✓ x = 3
/// let ref_lines = "12\n14\n5\n17\n19\n";
/// let comp_lines = "11\n12\n13\n14\n15\n16\n\n17\n18\n";
/// let result = busca::get_percent_matching_lines(ref_lines, comp_lines);
/// assert_eq!(result, 3.0 / 7.0);
/// ```
/// ---
/// ```
/// // ✓ ✓ x x = 2 / 4 = 0.5
/// let ref_lines = "12\n14\n5\n17";
/// let comp_lines = "11\n12\n13\n14\n15\n16\n\n17\n18\n";
/// let result = busca::get_percent_matching_lines(ref_lines, comp_lines);
/// assert_eq!(result, 4.0 / 13.0);
/// ```
///
pub fn get_percent_matching_lines(ref_lines: &str, comp_lines: &str) -> f32 {
let diff = TextDiff::from_lines(ref_lines, comp_lines);
diff.ratio()
}
106 changes: 53 additions & 53 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,59 @@ fn cli_run_search(args: &Args) -> Result<Vec<FileMatch>, String> {
Ok(file_match_vec)
}

fn output_detailed_diff(ref_lines: &str, comp_lines: &str) {
let diff = TextDiff::from_lines(ref_lines, comp_lines);

let grouped_operations = diff.grouped_ops(3);

if grouped_operations.is_empty() {
println!("The sequences are identical.");
return;
}

for (idx, group) in grouped_operations.iter().enumerate() {
if idx > 0 {
println!("{:-^1$}", "-", 80);
}
for op in group {
for change in diff.iter_inline_changes(op) {
let (sign, s) = match change.tag() {
ChangeTag::Delete => ("-", Style::new().red()),
ChangeTag::Insert => ("+", Style::new().green()),
ChangeTag::Equal => (" ", Style::new().dim()),
};
print!(
"{} {} {} |",
style(Line(change.old_index())).dim(),
style(Line(change.new_index())).dim(),
s.apply_to(sign).bold(),
);
for (emphasized, value) in change.iter_strings_lossy() {
if emphasized {
print!("{}", s.apply_to(value).underlined().on_black());
} else {
print!("{}", s.apply_to(value));
}
}
if change.missing_newline() {
println!();
}
}
}
}
}

struct Line(Option<usize>);

impl fmt::Display for Line {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
None => write!(f, " "),
Some(idx) => write!(f, "{:<4}", idx + 1),
}
}
}

#[cfg(test)]
mod test_cli_run_search {
use super::*;
Expand Down Expand Up @@ -388,56 +441,3 @@ mod test_cli_run_search {
assert_eq!(cli_run_search(&valid_args).unwrap(), expected);
}
}

fn output_detailed_diff(ref_lines: &str, comp_lines: &str) {
let diff = TextDiff::from_lines(ref_lines, comp_lines);

let grouped_operations = diff.grouped_ops(3);

if grouped_operations.is_empty() {
println!("The sequences are identical.");
return;
}

for (idx, group) in grouped_operations.iter().enumerate() {
if idx > 0 {
println!("{:-^1$}", "-", 80);
}
for op in group {
for change in diff.iter_inline_changes(op) {
let (sign, s) = match change.tag() {
ChangeTag::Delete => ("-", Style::new().red()),
ChangeTag::Insert => ("+", Style::new().green()),
ChangeTag::Equal => (" ", Style::new().dim()),
};
print!(
"{} {} {} |",
style(Line(change.old_index())).dim(),
style(Line(change.new_index())).dim(),
s.apply_to(sign).bold(),
);
for (emphasized, value) in change.iter_strings_lossy() {
if emphasized {
print!("{}", s.apply_to(value).underlined().on_black());
} else {
print!("{}", s.apply_to(value));
}
}
if change.missing_newline() {
println!();
}
}
}
}
}

struct Line(Option<usize>);

impl fmt::Display for Line {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
None => write!(f, " "),
Some(idx) => write!(f, "{:<4}", idx + 1),
}
}
}

0 comments on commit 8875334

Please sign in to comment.