diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py index 381d95ed19..aa7590994f 100644 --- a/frigate/embeddings/__init__.py +++ b/frigate/embeddings/__init__.py @@ -73,7 +73,7 @@ class EmbeddingsContext: def __init__(self, db: SqliteVecQueueDatabase): self.embeddings = Embeddings(db) self.thumb_stats = ZScoreNormalization() - self.desc_stats = ZScoreNormalization() + self.desc_stats = ZScoreNormalization(scale_factor=2.5, bias=0.5) # load stats from disk try: diff --git a/frigate/embeddings/functions/minilm_l6_v2.py b/frigate/embeddings/functions/minilm_l6_v2.py index a3a8b45b3a..5245edcdc9 100644 --- a/frigate/embeddings/functions/minilm_l6_v2.py +++ b/frigate/embeddings/functions/minilm_l6_v2.py @@ -46,7 +46,7 @@ def _download_model(self, path: str): elif os.path.basename(path) == self.TOKENIZER_FILE: logger.info("Downloading MiniLM tokenizer") tokenizer = AutoTokenizer.from_pretrained( - self.MODEL_NAME, clean_up_tokenization_spaces=False + self.MODEL_NAME, clean_up_tokenization_spaces=True ) tokenizer.save_pretrained(path) @@ -78,7 +78,7 @@ def _load_model_and_tokenizer(self): def _load_tokenizer(self): tokenizer_path = os.path.join(self.DOWNLOAD_PATH, self.TOKENIZER_FILE) return AutoTokenizer.from_pretrained( - tokenizer_path, clean_up_tokenization_spaces=False + tokenizer_path, clean_up_tokenization_spaces=True ) def _load_model(self, path: str, providers: List[str]): diff --git a/frigate/embeddings/util.py b/frigate/embeddings/util.py index 7550716c93..0b2acd4d67 100644 --- a/frigate/embeddings/util.py +++ b/frigate/embeddings/util.py @@ -4,12 +4,15 @@ class ZScoreNormalization: - """Running Z-score normalization for search distance.""" - - def __init__(self): + def __init__(self, scale_factor: float = 1.0, bias: float = 0.0): + """Initialize with optional scaling and bias adjustments.""" + """scale_factor adjusts the magnitude of each score""" + """bias will artificially shift the entire distribution upwards""" self.n = 0 self.mean = 0 self.m2 = 0 + self.scale_factor = scale_factor + self.bias = bias @property def variance(self): @@ -23,7 +26,10 @@ def normalize(self, distances: list[float]): self._update(distances) if self.stddev == 0: return distances - return [(x - self.mean) / self.stddev for x in distances] + return [ + (x - self.mean) / self.stddev * self.scale_factor + self.bias + for x in distances + ] def _update(self, distances: list[float]): for x in distances: diff --git a/web/src/views/search/SearchView.tsx b/web/src/views/search/SearchView.tsx index 7e77c20b8d..27090bb825 100644 --- a/web/src/views/search/SearchView.tsx +++ b/web/src/views/search/SearchView.tsx @@ -189,19 +189,9 @@ export default function SearchView({ // confidence score - probably needs tweaking - const zScoreToConfidence = (score: number, source: string) => { - let midpoint, scale; - - if (source === "thumbnail") { - midpoint = 2; - scale = 0.5; - } else { - midpoint = 0.5; - scale = 1.5; - } - + const zScoreToConfidence = (score: number) => { // Sigmoid function: 1 / (1 + e^x) - const confidence = 1 / (1 + Math.exp((score - midpoint) * scale)); + const confidence = 1 / (1 + Math.exp(score)); return Math.round(confidence * 100); }; @@ -412,21 +402,13 @@ export default function SearchView({ ) : ( )} - {zScoreToConfidence( - value.search_distance, - value.search_source, - )} - % + {zScoreToConfidence(value.search_distance)}% Matched {value.search_source} at{" "} - {zScoreToConfidence( - value.search_distance, - value.search_source, - )} - % + {zScoreToConfidence(value.search_distance)}%