Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Naming & docs/doctests #4

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 51 additions & 49 deletions src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use std::{

use crate::{
codecs::{NativeU32, ZeroCopyCodec},
error::{DbError, GetDocumentError, SearchError},
error::{DatabaseError, DocumentError, SearchError},
normalize,
roaringish::{Aligned, ArchivedBorrowRoaringishPacked, RoaringishPackedKind, Unaligned},
stats::Stats,
Expand Down Expand Up @@ -233,19 +233,19 @@ impl<D> Document for D where
{
}

pub struct DB<D: Document> {
pub struct Db<D: Document> {
pub env: Env,
db_main: Database<Unspecified, Unspecified>,
db_doc_id_to_document: Database<NativeU32, ZeroCopyCodec<D>>,
db_token_to_offsets: Database<Str, ZeroCopyCodec<Offset>>,
}

unsafe impl<D: Document> Send for DB<D> {}
unsafe impl<D: Document> Send for Db<D> {}

unsafe impl<D: Document> Sync for DB<D> {}
unsafe impl<D: Document> Sync for Db<D> {}

impl<D: Document> DB<D> {
pub fn truncate<P: AsRef<Path>>(path: P, db_size: usize) -> Result<Self, DbError> {
impl<D: Document> Db<D> {
pub fn truncate<P: AsRef<Path>>(path: P, db_size: usize) -> Result<Self, DatabaseError> {
let path = path.as_ref();
let _ = std::fs::remove_dir_all(path);
std::fs::create_dir_all(path)?;
Expand Down Expand Up @@ -287,7 +287,7 @@ impl<D: Document> DB<D> {
rwtxn: &mut RwTxn,
doc_ids: &[u32],
documents: &[D],
) -> Result<(), DbError> {
) -> Result<(), DatabaseError> {
log::debug!("Writing documents");
let b = std::time::Instant::now();
for (doc_id, document) in doc_ids.iter().zip(documents.iter()) {
Expand All @@ -304,7 +304,7 @@ impl<D: Document> DB<D> {
token_id_to_roaringish_packed: &[RoaringishPacked],
mmap_size: &mut usize,
batch_id: u32,
) -> Result<(), DbError> {
) -> Result<(), DatabaseError> {
log::debug!("Writing token to roaringish packed");
let b = std::time::Instant::now();
let mut token_to_packed: Vec<_> = token_to_token_id
Expand Down Expand Up @@ -337,7 +337,7 @@ impl<D: Document> DB<D> {
mmap_size: usize,
number_of_batches: u32,
rwtxn: &mut RwTxn,
) -> Result<(), DbError> {
) -> Result<(), DatabaseError> {
#[inline(always)]
unsafe fn write_to_mmap<const N: usize>(
mmap: &mut MmapMut,
Expand Down Expand Up @@ -374,14 +374,14 @@ impl<D: Document> DB<D> {

// we need to do this in 3 steps because of the borrow checker
let files_mmaps = (0..number_of_batches)
.map(|i| -> Result<Mmap, DbError> {
.map(|i| -> Result<Mmap, DatabaseError> {
let file_name = format!("{}_{i}", db_constants::TEMP_FILE_TOKEN_TO_PACKED);
let file = File::options()
.read(true)
.open(self.env.path().join(file_name))?;
unsafe { Ok(Mmap::map(&file)?) }
})
.collect::<Result<Vec<_>, DbError>>()?;
.collect::<Result<Vec<_>, DatabaseError>>()?;
let files_data: Vec<_> = files_mmaps
.iter()
.map(|mmap| unsafe {
Expand Down Expand Up @@ -497,12 +497,12 @@ impl<D: Document> DB<D> {
fn read_common_tokens(
rotxn: &RoTxn,
db_main: Database<Unspecified, Unspecified>,
) -> Result<HashSet<Box<str>>, DbError> {
) -> Result<HashSet<Box<str>>, DatabaseError> {
let k = db_main
.remap_types::<Str, ZeroCopyCodec<HashSet<Box<str>>>>()
.get(rotxn, db_constants::KEY_COMMON_TOKENS)?
.ok_or_else(|| {
DbError::KeyNotFound(
DatabaseError::KeyNotFound(
db_constants::KEY_COMMON_TOKENS.to_string(),
"main".to_string(),
)
Expand All @@ -515,7 +515,7 @@ impl<D: Document> DB<D> {
&self,
rwtxn: &mut RwTxn,
common_tokens: &HashSet<Box<str>>,
) -> Result<(), DbError> {
) -> Result<(), DatabaseError> {
log::debug!("Writing common tokens");
let b = std::time::Instant::now();
self.db_main
Expand All @@ -525,7 +525,7 @@ impl<D: Document> DB<D> {
Ok(())
}

pub fn open<P: AsRef<Path>>(path: P) -> Result<(Self, HashSet<Box<str>>, Mmap), DbError> {
pub fn open<P: AsRef<Path>>(path: P) -> Result<(Self, HashSet<Box<str>>, Mmap), DatabaseError> {
let path = path.as_ref();
let env = unsafe {
EnvOpenOptions::new()
Expand All @@ -538,7 +538,7 @@ impl<D: Document> DB<D> {

let db_main = env
.open_database(&rotxn, None)?
.ok_or_else(|| DbError::DatabaseError("main".to_string()))?;
.ok_or_else(|| DatabaseError::DatabaseError("main".to_string()))?;

let db_doc_id_to_document = env
.database_options()
Expand All @@ -547,12 +547,14 @@ impl<D: Document> DB<D> {
.name(db_constants::DB_DOC_ID_TO_DOCUMENT)
.open(&rotxn)?
.ok_or_else(|| {
DbError::DatabaseError(db_constants::DB_DOC_ID_TO_DOCUMENT.to_string())
DatabaseError::DatabaseError(db_constants::DB_DOC_ID_TO_DOCUMENT.to_string())
})?;

let db_token_to_offsets = env
.open_database(&rotxn, Some(db_constants::DB_TOKEN_TO_OFFSETS))?
.ok_or_else(|| DbError::DatabaseError(db_constants::DB_TOKEN_TO_OFFSETS.to_string()))?;
.ok_or_else(|| {
DatabaseError::DatabaseError(db_constants::DB_TOKEN_TO_OFFSETS.to_string())
})?;

let common_tokens = Self::read_common_tokens(&rotxn, db_main)?;

Expand Down Expand Up @@ -593,7 +595,7 @@ impl<D: Document> DB<D> {
> {
#[inline(always)]
fn check_before_recursion<'a, 'b, 'alloc, D: Document>(
me: &DB<D>,
me: &Db<D>,
rotxn: &RoTxn,
tokens: RefTokens<'a>,
token_to_packed: &mut GxHashMap<RefTokens<'a>, BorrowRoaringishPacked<'b, Aligned>>,
Expand All @@ -611,7 +613,7 @@ impl<D: Document> DB<D> {
let score = match token_to_packed.entry(tokens) {
Entry::Occupied(e) => e.get().len(),
Entry::Vacant(e) => {
let packed = me.get_roaringish_packed(rotxn, &tokens[0], mmap)?;
let packed = me.roaringish_packed(rotxn, &tokens[0], mmap)?;
let score = packed.len();
e.insert(packed);

Expand All @@ -625,7 +627,7 @@ impl<D: Document> DB<D> {

#[allow(clippy::too_many_arguments)]
fn inner_merge_and_minimize_tokens<'a, 'b, 'c, 'alloc, D: Document>(
me: &DB<D>,
me: &Db<D>,
rotxn: &RoTxn,
tokens: RefTokens<'a>,
common_tokens: &HashSet<Box<str>>,
Expand Down Expand Up @@ -662,7 +664,7 @@ impl<D: Document> DB<D> {
let score = match token_to_packed.entry(tokens) {
Entry::Occupied(e) => e.get().len(),
Entry::Vacant(e) => {
let packed = me.get_roaringish_packed(rotxn, tokens.tokens(), mmap)?;
let packed = me.roaringish_packed(rotxn, tokens.tokens(), mmap)?;
let score = packed.len();
e.insert(packed);
score
Expand Down Expand Up @@ -731,7 +733,7 @@ impl<D: Document> DB<D> {
// function makes some queries performance unpredictable
#[inline(never)]
fn no_common_tokens<'a, 'b, 'alloc, D: Document>(
me: &DB<D>,
me: &Db<D>,
rotxn: &RoTxn,
tokens: RefTokens<'a>,
mmap: &'b Mmap,
Expand All @@ -747,7 +749,7 @@ impl<D: Document> DB<D> {
let mut v = Vec::with_capacity(l);

for token in tokens.ref_token_iter() {
let packed = me.get_roaringish_packed(rotxn, token.tokens(), mmap)?;
let packed = me.roaringish_packed(rotxn, token.tokens(), mmap)?;
token_to_packed.insert(token, packed);
v.push(token);
}
Expand Down Expand Up @@ -797,7 +799,7 @@ impl<D: Document> DB<D> {
}
}

fn get_roaringish_packed_from_offset<'a>(
fn roaringish_packed_from_offset<'a>(
offset: &ArchivedOffset,
mmap: &'a Mmap,
) -> Result<BorrowRoaringishPacked<'a, Aligned>, SearchError> {
Expand All @@ -813,13 +815,13 @@ impl<D: Document> DB<D> {
}

mmap.advise_range(memmap2::Advice::Sequential, begin, len)
.map_err(|e| DbError::from(e))?;
.map_err(|e| DatabaseError::from(e))?;

Ok(BorrowRoaringishPacked::new_raw(packed))
}

#[inline(always)]
pub fn get_roaringish_packed<'a>(
pub fn roaringish_packed<'a>(
&self,
rotxn: &RoTxn,
token: &str,
Expand All @@ -828,9 +830,9 @@ impl<D: Document> DB<D> {
let offset = self
.db_token_to_offsets
.get(rotxn, token)
.map_err(|e| DbError::from(e))?;
.map_err(|e| DatabaseError::from(e))?;
match offset {
Some(offset) => Self::get_roaringish_packed_from_offset(offset, mmap),
Some(offset) => Self::roaringish_packed_from_offset(offset, mmap),
None => Err(SearchError::TokenNotFound(token.to_string())),
}
}
Expand All @@ -855,11 +857,11 @@ impl<D: Document> DB<D> {
return Err(SearchError::EmptyQuery);
}

let rotxn = self.env.read_txn().map_err(|e| DbError::from(e))?;
let rotxn = self.env.read_txn().map_err(|e| DatabaseError::from(e))?;
if tokens.len() == 1 {
// this can't failt, we just checked
self.get_roaringish_packed(&rotxn, tokens.first().unwrap(), mmap)?
.get_doc_ids(stats);
self.roaringish_packed(&rotxn, tokens.first().unwrap(), mmap)?
.document_ids(stats);
}

let b = std::time::Instant::now();
Expand All @@ -878,7 +880,7 @@ impl<D: Document> DB<D> {
return token_to_packed
.get(&final_tokens[0])
.ok_or_else(|| SearchError::TokenNotFound(final_tokens[0].tokens().to_string()))
.map(|p| p.get_doc_ids(stats));
.map(|p| p.document_ids(stats));
}

// at this point we know that we have at least
Expand Down Expand Up @@ -982,26 +984,26 @@ impl<D: Document> DB<D> {
}
}

Ok(result_borrow.get_doc_ids(stats))
Ok(result_borrow.document_ids(stats))
}

fn inner_get_archived_document<'a>(
&self,
rotxn: &'a RoTxn,
doc_id: &u32,
) -> Result<&'a D::Archived, GetDocumentError> {
) -> Result<&'a D::Archived, DocumentError> {
self.db_doc_id_to_document
.get(rotxn, doc_id)
.map_err(|e| DbError::from(e))?
.ok_or(GetDocumentError::DocumentNotFound(*doc_id))
.map_err(|e| DatabaseError::from(e))?
.ok_or(DocumentError::DocumentNotFound(*doc_id))
}

pub fn get_archived_documents(
pub fn archived_documents(
&self,
doc_ids: &[u32],
cb: impl FnOnce(Vec<&D::Archived>),
) -> Result<(), GetDocumentError> {
let rotxn = self.env.read_txn().map_err(|e| DbError::from(e))?;
) -> Result<(), DocumentError> {
let rotxn = self.env.read_txn().map_err(|e| DatabaseError::from(e))?;
let docs = doc_ids
.into_iter()
.map(|doc_id| self.inner_get_archived_document(&rotxn, doc_id))
Expand All @@ -1012,41 +1014,41 @@ impl<D: Document> DB<D> {
Ok(())
}

pub fn get_archived_document(
pub fn archived_document(
&self,
doc_id: u32,
cb: impl FnOnce(&D::Archived),
) -> Result<(), GetDocumentError> {
let rotxn = self.env.read_txn().map_err(|e| DbError::from(e))?;
) -> Result<(), DocumentError> {
let rotxn = self.env.read_txn().map_err(|e| DatabaseError::from(e))?;
let doc = self.inner_get_archived_document(&rotxn, &doc_id)?;

cb(doc);

Ok(())
}

pub fn get_documents(&self, doc_ids: &[u32]) -> Result<Vec<D>, GetDocumentError>
pub fn documents(&self, doc_ids: &[u32]) -> Result<Vec<D>, DocumentError>
where
<D as Archive>::Archived: Deserialize<D, Strategy<Pool, rkyv::rancor::Error>>,
{
let rotxn = self.env.read_txn().map_err(|e| DbError::from(e))?;
let rotxn = self.env.read_txn().map_err(|e| DatabaseError::from(e))?;
doc_ids
.into_iter()
.map(|doc_id| {
let archived = self.inner_get_archived_document(&rotxn, doc_id)?;
rkyv::deserialize::<D, rkyv::rancor::Error>(archived)
.map_err(|e| GetDocumentError::DbError(DbError::from(e)))
.map_err(|e| DocumentError::DatabaseError(DatabaseError::from(e)))
})
.collect::<Result<Vec<_>, _>>()
}

pub fn get_document(&self, doc_id: u32) -> Result<D, GetDocumentError>
pub fn document(&self, doc_id: u32) -> Result<D, DocumentError>
where
<D as Archive>::Archived: Deserialize<D, Strategy<Pool, rkyv::rancor::Error>>,
{
let rotxn = self.env.read_txn().map_err(|e| DbError::from(e))?;
let rotxn = self.env.read_txn().map_err(|e| DatabaseError::from(e))?;
let archived = self.inner_get_archived_document(&rotxn, &doc_id)?;
rkyv::deserialize::<D, rkyv::rancor::Error>(archived)
.map_err(|e| GetDocumentError::DbError(DbError::from(e)))
.map_err(|e| DocumentError::DatabaseError(DatabaseError::from(e)))
}
}
8 changes: 4 additions & 4 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use thiserror::Error;

/// Possible errors that can occur while interacting with the database.
#[derive(Error, Debug)]
pub enum DbError {
pub enum DatabaseError {
#[error("Io error: {0}")]
IoError(#[from] std::io::Error),

Expand All @@ -23,7 +23,7 @@ pub enum DbError {
#[derive(Error, Debug)]
pub enum SearchError {
#[error("Db error: {0}")]
DbError(#[from] DbError),
DatabaseError(#[from] DatabaseError),

#[error("Searched query is empty")]
EmptyQuery,
Expand All @@ -43,9 +43,9 @@ pub enum SearchError {

/// Possible errors when trying to retrieve documents by their internal ID.
#[derive(Error, Debug)]
pub enum GetDocumentError {
pub enum DocumentError {
#[error("Db error: {0}")]
DbError(#[from] DbError),
DatabaseError(#[from] DatabaseError),

#[error("Document with id `{0}` not found")]
DocumentNotFound(u32),
Expand Down
Loading