From d6970280a1e3c0e968c26bfbc859fb519fcf5d8a Mon Sep 17 00:00:00 2001 From: Nick O'Ferrall Date: Wed, 12 Feb 2025 17:43:45 +0000 Subject: [PATCH] update jaccard comments --- .../client/utils/smartGroup/computeJaccardDistanceMatrix.ts | 3 +-- packages/client/utils/smartGroup/groupReflections.ts | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/client/utils/smartGroup/computeJaccardDistanceMatrix.ts b/packages/client/utils/smartGroup/computeJaccardDistanceMatrix.ts index ca3c575cf65..a5654388ea5 100644 --- a/packages/client/utils/smartGroup/computeJaccardDistanceMatrix.ts +++ b/packages/client/utils/smartGroup/computeJaccardDistanceMatrix.ts @@ -1,5 +1,4 @@ /** - * Example: computeJaccardDistanceMatrix(reflections) * For each pair of reflections, measure how many words they share * vs. how many unique words total. (Jaccard similarity = intersection/union). * We then use distance = 1 - similarity and feed that to clusterfck. @@ -28,7 +27,7 @@ const jaccardDistance = (aTokens: string[], bTokens: string[]): number => { const setB = new Set(bTokens) const intersectionSize = [...setA].filter((x) => setB.has(x)).length const unionSize = new Set([...setA, ...setB]).size - if (unionSize === 0) return 0 // edge case, treat empty union as identical + if (unionSize === 0) return 0 // edge case, treat empty union as identical although reflections should always have text const similarity = intersectionSize / unionSize const distance = 1 - similarity return distance diff --git a/packages/client/utils/smartGroup/groupReflections.ts b/packages/client/utils/smartGroup/groupReflections.ts index 4a4d59a21e0..4f5b64cbaf3 100644 --- a/packages/client/utils/smartGroup/groupReflections.ts +++ b/packages/client/utils/smartGroup/groupReflections.ts @@ -14,7 +14,6 @@ const groupReflections = < groupingOptions: GroupingOptions ) => { const reflectionTexts = reflections.map((r) => r.plaintextContent || '') - const distanceMatrix = computeJaccardDistanceMatrix(reflectionTexts) const {