diff --git a/src/qdrant_haystack/document_stores/qdrant.py b/src/qdrant_haystack/document_stores/qdrant.py index 920f205..8f78974 100644 --- a/src/qdrant_haystack/document_stores/qdrant.py +++ b/src/qdrant_haystack/document_stores/qdrant.py @@ -74,6 +74,7 @@ def __init__( init_from: Optional[dict] = None, wait_result_from_api: bool = True, metadata: Optional[dict] = None, + batch_size: int = 10_000, ): super().__init__() @@ -108,6 +109,7 @@ def __init__( # Make sure the collection is properly set up self._set_up_collection(index, embedding_dim, recreate_index, similarity) + self.batch_size = batch_size self.embedding_dim = embedding_dim self.content_field = content_field self.name_field = name_field @@ -130,9 +132,10 @@ def get_all_documents( index: Optional[str] = None, filters: Optional[FilterType] = None, return_embedding: Optional[bool] = None, - batch_size: int = 10_000, + batch_size: Optional[int] = None, headers: Optional[Dict[str, str]] = None, ) -> List[Document]: + batch_size = batch_size or self.batch_size return list( self.get_all_documents_generator( index, filters, return_embedding, batch_size, headers @@ -144,9 +147,10 @@ def get_all_documents_generator( index: Optional[str] = None, filters: Optional[FilterType] = None, return_embedding: Optional[bool] = None, - batch_size: int = 10_000, + batch_size: Optional[int] = None, headers: Optional[Dict[str, str]] = None, ) -> Generator[Document, None, None]: + batch_size = batch_size or self.batch_size index = index or self.index qdrant_filters = self.qdrant_filter_converter.convert(filters) @@ -185,11 +189,11 @@ def get_documents_by_id( self, ids: List[str], index: Optional[str] = None, - batch_size: int = 10_000, + batch_size: Optional[int] = None, headers: Optional[Dict[str, str]] = None, ) -> List[Document]: index = index or self.index - + batch_size = batch_size or self.batch_size documents: List[Document] = [] next_offset = None @@ -279,11 +283,12 @@ def write_documents( self, documents: Union[List[dict], List[Document]], index: Optional[str] = None, - batch_size: int = 10_000, + batch_size: Optional[int] = None, duplicate_documents: Optional[str] = None, headers: Optional[Dict[str, str]] = None, ): index = index or self.index + batch_size = batch_size or self.batch_size self._set_up_collection(index, self.embedding_dim, False, self.similarity) field_map = self._create_document_field_map()