BenchCouncil · cococo2000 · Oct 21, 2024 · Oct 21, 2024
diff --git a/bigvectorbench/algorithms/base/module.py b/bigvectorbench/algorithms/base/module.py
@@ -3,6 +3,7 @@
 from multiprocessing.pool import ThreadPool
 from typing import Any, Dict, Optional
 import psutil
+import docker
 
 import numpy as np
 
@@ -11,6 +12,17 @@ class BaseANN(object):
     """
     Base class/interface for Approximate Nearest Neighbors (ANN) algorithms used in benchmarking.
     """
+    name = "BaseANN"
+    _num_entities = 0
+
+    def __init__(self):
+        try:
+            self.docker_client = docker.from_env()
+            self.containers = self.docker_client.containers.list()
+        except Exception:
+            print("Test environment is not dockerized")
+            self.docker_client = None
+            self.containers = []
 
     @property
     def num_entities(self) -> int:
@@ -34,7 +46,13 @@ def get_memory_usage(self) -> Optional[float]:
             float: The current memory usage in kilobytes (for backwards compatibility), or None if
                 this information is not available.
         """
-        return psutil.Process().memory_info().rss / 1024
+        if self.docker_client is None:
+            return psutil.Process().memory_info().rss / 1024
+        else:
+            sum_memory = 0
+            for container in self.containers:
+                sum_memory += container.stats(stream=False)["memory_stats"]["usage"] / 1024
+            return sum_memory
 
     def load_data(
         self,
@@ -52,6 +70,7 @@ def load_data(
             label_names (list[str]): label names
             label_types (list[str]): label types
         """
+        raise NotImplementedError
 
     def create_index(self) -> None:
         """

diff --git a/bigvectorbench/algorithms/bruteforce/module.py b/bigvectorbench/algorithms/bruteforce/module.py
@@ -6,6 +6,7 @@
 
 
 class BruteForce(BaseANN):
+    """kNN search that uses a linear scan = brute force."""
     def __init__(self, metric):
         if metric not in ("angular", "euclidean", "hamming"):
             raise NotImplementedError("BruteForce doesn't support metric %s" % metric)
@@ -21,6 +22,7 @@ def __init__(self, metric):
         self.labels = None
         self.label_names = None
         self.label_types = None
+        super().__init__()
 
     def load_data(
         self,

diff --git a/bigvectorbench/algorithms/dummy_algo/module.py b/bigvectorbench/algorithms/dummy_algo/module.py
@@ -11,6 +11,7 @@ class DummyAlgo(BaseANN):
     def __init__(self, metric):
         self.name = "DummyAlgo"
         self.len = 0
+        super().__init__()
 
     def load_data(
         self,

diff --git a/bigvectorbench/algorithms/elasticsearch/module.py b/bigvectorbench/algorithms/elasticsearch/module.py
@@ -59,6 +59,7 @@ def __init__(self, metric: str, dim: int):
         self.batch_search_queries = []
         self.batch_results = []
         self.batch_latencies = []
+        super().__init__()
 
     def start_container(self) -> None:
         """
@@ -86,7 +87,7 @@ def stop_container(self) -> None:
             )
             print("[Elasticsearch] docker compose down successfully!!!")
         except subprocess.CalledProcessError as e:
-            print(f"[Vearch] docker compose down failed: {e}!!!")
+            print(f"[Elasticsearch] docker compose down failed: {e}!!!")
 
     def get_vector_index(self):
         """Get vector index"""
@@ -288,7 +289,7 @@ def query(self, v, n, filter_expr=None):
     #         filter=self.query_filter,
     #         limit=self.query_topk,
     #     )
-    #     # print(f"[Vearch] query result: {ret.__dict__}")
+    #     # print(f"[Elasticsearch] query result: {ret.__dict__}")
     #     self.prepare_query_results = [point["id"] for point in ret.documents[0]]
 
     # def get_prepared_query_results(self) -> list[int]:
@@ -369,7 +370,7 @@ def insert(self, embeddings: np.ndarray, labels: np.ndarray | None = None) -> No
     #     Returns:
     #         None
     #     """
-    #     # print(f"[Vearch] delete index: {index}")
+    #     # print(f"[Elasticsearch] delete index: {index}")
     #     self.client.delete(
     #         database_name=self._database_name,
     #         space_name=f"{self._database_name}_space",

diff --git a/bigvectorbench/algorithms/milvus/module.py b/bigvectorbench/algorithms/milvus/module.py
@@ -69,6 +69,7 @@ def __init__(self, metric: str, dim: int):
         self.batch_results = []
         self.batch_latencies = []
         self.requests = []
+        super().__init__()
 
     def start_milvus(self) -> None:
         """

diff --git a/bigvectorbench/algorithms/qdrant/module.py b/bigvectorbench/algorithms/qdrant/module.py
@@ -62,6 +62,7 @@ def __init__(self, metric: str, index_param: dict):
         self.batch_search_queries = []
         self.batch_results = []
         self.batch_latencies = []
+        super().__init__()
 
     def start_container(self) -> None:
         """

diff --git a/bigvectorbench/algorithms/redis/module.py b/bigvectorbench/algorithms/redis/module.py
@@ -52,6 +52,7 @@ def __init__(self, metric: str, dim: int):
         self.search_params = None
         self.query = None
         self.prepare_query_results = None
+        super().__init__()
 
     def start_container(self) -> None:
         """

diff --git a/bigvectorbench/algorithms/vearch/module.py b/bigvectorbench/algorithms/vearch/module.py
@@ -71,6 +71,7 @@ def __init__(self, metric: str, dim: int):
         self.batch_search_queries = []
         self.batch_results = []
         self.batch_latencies = []
+        super().__init__()
 
     def start_container(self) -> None:
         """

diff --git a/bigvectorbench/algorithms/weaviate/module.py b/bigvectorbench/algorithms/weaviate/module.py
@@ -74,6 +74,7 @@ def __init__(self, metric: str):
         self.batch_query_filters = None
         self.batch_results = []
         self.batch_latencies = []
+        super().__init__()
 
     def start_weaviate(self) -> None:
         """