Skip to content

Commit

Permalink
Merge pull request #5 from oramasearch/feat/quality_check
Browse files Browse the repository at this point in the history
chore: adds quality checks
  • Loading branch information
micheleriva authored Nov 26, 2024
2 parents e226462 + 93e6b8b commit 69439dd
Show file tree
Hide file tree
Showing 7 changed files with 359 additions and 1 deletion.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.idea
target
target
benchmark_results.csv
benchmark_results.png
92 changes: 92 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ rand_distr = "0.4.3"
rayon = "1.10.0"
log = "0.4.22"
thiserror = "2.0.3"
env_logger = "0.11.5"
serde = { version = "1.0.215", features = ["derive"] }

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
Expand All @@ -35,3 +37,7 @@ path = "src/bin/example.rs"
[[bin]]
name = "readme_example"
path = "src/bin/readme_example.rs"

[[bin]]
name = "quality_check"
path = "src/bin/quality_check.rs"
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.PHONY: quality_check

RUST_LOG := info
PLOTS_DIR := plots
BENCHMARK_RESULTS := benchmark_results.png

quality_check:
$(RUST_LOG) cargo run --release --bin quality_check
cd $(PLOTS_DIR) && python3 main.py
mv $(PLOTS_DIR)/$(BENCHMARK_RESULTS) ./$(BENCHMARK_RESULTS)

clean:
rm -f $(BENCHMARK_RESULTS)
rm -f benchmark_results.csv
52 changes: 52 additions & 0 deletions plots/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read the benchmark results
df = pd.read_csv('../benchmark_results.csv')

# Set up the plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Create a figure with multiple subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Plot 1: Timing metrics
ax1.plot(df['n_samples'], df['fit_time_ms'], marker='o', label='Fit Time')
ax1.plot(df['n_samples'], df['compression_time_ms'], marker='o', label='Compression Time')
ax1.set_xlabel('Number of Samples')
ax1.set_ylabel('Time (ms)')
ax1.set_title('Processing Time vs Dataset Size')
ax1.legend()
ax1.set_xscale('log')
ax1.set_yscale('log')

# Plot 2: Quality metrics
ax2.plot(df['n_samples'], df['reconstruction_error'], marker='o', label='Reconstruction Error')
ax2.plot(df['n_samples'], df['recall'], marker='o', label='Recall@10')
ax2.set_xlabel('Number of Samples')
ax2.set_ylabel('Score')
ax2.set_title('Quality Metrics vs Dataset Size')
ax2.legend()
ax2.set_xscale('log')

# Plot 3: Memory reduction
ax3.plot(df['n_samples'], (1 - df['memory_reduction_ratio']) * 100, marker='o')
ax3.set_xlabel('Number of Samples')
ax3.set_ylabel('Memory Reduction (%)')
ax3.set_title('Memory Reduction vs Dataset Size')
ax3.set_xscale('log')

# Plot 4: Time per sample
df['time_per_sample'] = (df['compression_time_ms']) / df['n_samples']
ax4.plot(df['n_samples'], df['time_per_sample'], marker='o')
ax4.set_xlabel('Number of Samples')
ax4.set_ylabel('Compression Time per Sample (ms)')
ax4.set_title('Scaling Efficiency')
ax4.set_xscale('log')
ax4.set_yscale('log')

plt.tight_layout()
plt.savefig('benchmark_results.png', dpi=300, bbox_inches='tight')
plt.close()
3 changes: 3 additions & 0 deletions plots/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pandas
matplotlib
seaborn
Loading

0 comments on commit 69439dd

Please sign in to comment.