From 82daf6d29ac3b556a77e792c1339bbe941e35bd9 Mon Sep 17 00:00:00 2001 From: Jigsaw Date: Mon, 3 Feb 2025 23:07:18 -0500 Subject: [PATCH] Add stats utilities for disagree consensus (high disagreement across groups) GitOrigin-RevId: 7c3cf87ed007ae56d9bc81e39ca40f3c0549aa5c --- src/stats_util.test.ts | 67 ++++++++++++++++++++++ src/stats_util.ts | 60 ++++++++++++++++--- src/tasks/summarization_subtasks/topics.ts | 2 +- 3 files changed, 121 insertions(+), 8 deletions(-) diff --git a/src/stats_util.test.ts b/src/stats_util.test.ts index 5412e7a..0e51aae 100644 --- a/src/stats_util.test.ts +++ b/src/stats_util.test.ts @@ -18,6 +18,9 @@ import { getAgreeProbability, getGroupInformedConsensus, getGroupAgreeProbDifference, + getDisagreeProbability, + getGroupInformedDisagreeConsensus, + getMinDisagreeProb, getMinAgreeProb, } from "./stats_util"; import { Comment } from "./types"; @@ -126,6 +129,70 @@ describe("stats utility functions", () => { ).toBeCloseTo(3 / 11); }); + it("should get the disagree probability for a given vote tally", () => { + expect( + getDisagreeProbability({ agreeCount: 10, disagreeCount: 5, passCount: 5, totalCount: 20 }) + ).toBeCloseTo((5 + 1) / (20 + 2)); + }); + + it("should handle vote tallies with zero counts", () => { + expect(getDisagreeProbability({ agreeCount: 0, disagreeCount: 0, totalCount: 0 })).toBeCloseTo( + 0.5 + ); + expect(getDisagreeProbability({ agreeCount: 0, disagreeCount: 5, totalCount: 5 })).toBeCloseTo( + 6 / 7 + ); + expect(getDisagreeProbability({ agreeCount: 5, disagreeCount: 0, totalCount: 5 })).toBeCloseTo( + 1 / 7 + ); + }); + + it("should get the group informed consensus for a given comment", () => { + expect( + getGroupInformedDisagreeConsensus({ + id: "1", + text: "comment1", + voteTalliesByGroup: { + "0": { + agreeCount: 5, + disagreeCount: 10, + passCount: 0, + totalCount: 15, + }, + "1": { + agreeCount: 10, + disagreeCount: 5, + passCount: 5, + totalCount: 20, + }, + }, + }) + ).toBeCloseTo(((11 / 17) * 6) / 22); + }); + + it("should get the minimum agree probability across groups for a given comment", () => { + expect( + getMinDisagreeProb({ + id: "1", + text: "comment1", + voteTalliesByGroup: { + "0": { + agreeCount: 5, + disagreeCount: 10, + passCount: 0, + totalCount: 15, + }, + "1": { + agreeCount: 10, + disagreeCount: 5, + passCount: 5, + totalCount: 20, + }, + }, + }) + ).toBeCloseTo(3 / 11); + }); + it("should get the group agree difference for a given comment and group", () => { expect( getGroupAgreeProbDifference( diff --git a/src/stats_util.ts b/src/stats_util.ts index 14eaa2a..25e6616 100644 --- a/src/stats_util.ts +++ b/src/stats_util.ts @@ -18,7 +18,7 @@ import { Comment, CommentWithVoteTallies, isCommentWithVoteTalliesType, VoteTall import { groupCommentsBySubtopic } from "./sensemaker_utils"; /** - * A function which returns the estimated aggree probability for a given vote tally entry as a MAP estimate + * Compute the MAP probability estimate of an aggree vote for a given vote tally entry. */ export function getAgreeProbability(voteTally: VoteTally): number { const totalCount = voteTally.agreeCount + voteTally.disagreeCount + (voteTally.passCount || 0); @@ -29,8 +29,8 @@ export function getAgreeProbability(voteTally: VoteTally): number { } /** - * A function which computes group informed consensus for the given set of vote tallies, given vote tally data, aggregated by some groupBy factor. - * Computed as the product of the aggree probabilities + * Computes group informed (agree) consensus for a comment's vote tallies, + * computed as the product of the aggree probabilities across groups. */ export function getGroupInformedConsensus(comment: CommentWithVoteTallies): number { return Object.values(comment.voteTalliesByGroup).reduce( @@ -46,6 +46,35 @@ export function getMinAgreeProb(comment: CommentWithVoteTallies): number { return Math.min(...Object.values(comment.voteTalliesByGroup).map(getAgreeProbability)); } +/** + * Compute the MAP probability estimate of a disaggree vote for a given vote tally entry. + */ +export function getDisagreeProbability(voteTally: VoteTally): number { + const totalCount = voteTally.agreeCount + voteTally.disagreeCount + (voteTally.passCount || 0); + // We add +1 and +2 to the numerator and demonenator respectively as a psuedo-count prior so that probabilities tend to 1/2 in the + // absence of data, and to avoid division/multiplication by zero in group informed consensus and risk ratio calculations. This is technically + // a simple maxima a priori (MAP) probability estimate. + return (voteTally.disagreeCount + 1) / (totalCount + 2); +} + +/** + * Computes group informed (disagree) consensus for a comment's vote tallies + * computed as the product of disaggree probabilities across groups. + */ +export function getGroupInformedDisagreeConsensus(comment: CommentWithVoteTallies): number { + return Object.values(comment.voteTalliesByGroup).reduce( + (product, voteTally) => product * getDisagreeProbability(voteTally), + 1 + ); +} + +/** + * A function which returns the minimum disagree probability across groups + */ +export function getMinDisagreeProb(comment: CommentWithVoteTallies): number { + return Math.min(...Object.values(comment.voteTalliesByGroup).map(getDisagreeProbability)); +} + /** * Computes the difference between the MAP probability estimate of agreeing within * a given group as compared with the rest of the conversation. @@ -117,7 +146,7 @@ export function getCommentVoteCount(comment: Comment): number { */ export class SummaryStats { comments: Comment[]; - minAgreeProbCommonGround = 0.6; + minCommonGroundProb = 0.6; minAgreeProbDifference = 0.3; maxSampleSize = 5; constructor(comments: Comment[]) { @@ -265,7 +294,24 @@ export class GroupedSummaryStats extends SummaryStats { (comment) => getGroupInformedConsensus(comment), k, // Before using Group Informed Consensus a minimum bar of agreement between groups is enforced - (comment: CommentWithVoteTallies) => getMinAgreeProb(comment) >= this.minAgreeProbCommonGround + (comment: CommentWithVoteTallies) => getMinAgreeProb(comment) >= this.minCommonGroundProb + ); + } + + /** + * Gets the topK disagreed upon comments across all groups. + * + * This is measured via the getGroupInformedDisagreeConsensus metric, subject to the constraints of + * this.minVoteCount and this.minAgreeProbCommonGround settings. + * @param k dfaults to this.maxSampleSize + * @returns the top disagreed on comments + */ + getCommonGroundDisagreeComments(k: number = this.maxSampleSize) { + return this.topK( + (comment) => getGroupInformedDisagreeConsensus(comment), + k, + // Before using Group Informed Consensus a minimum bar of agreement between groups is enforced + (comment: CommentWithVoteTallies) => getMinDisagreeProb(comment) >= this.minCommonGroundProb ); } @@ -282,7 +328,7 @@ export class GroupedSummaryStats extends SummaryStats { (comment: CommentWithVoteTallies) => getGroupAgreeProbDifference(comment, group), k, (comment: CommentWithVoteTallies) => - getMinAgreeProb(comment) < this.minAgreeProbCommonGround && + getMinAgreeProb(comment) < this.minCommonGroundProb && getGroupAgreeProbDifference(comment, group) > this.minAgreeProbDifference ); } @@ -305,7 +351,7 @@ export class GroupedSummaryStats extends SummaryStats { (comment: CommentWithVoteTallies) => // Some group must agree with the comment less than the minAgreeProbCommonGround // threshold, so that this comment doesn't also qualify as a common ground comment. - getMinAgreeProb(comment) < this.minAgreeProbCommonGround && + getMinAgreeProb(comment) < this.minCommonGroundProb && // Some group must disagree with the rest by a margin larger than the // getGroupAgreeProbDifference. getMaxGroupAgreeProbDifference(comment) < this.minAgreeProbDifference diff --git a/src/tasks/summarization_subtasks/topics.ts b/src/tasks/summarization_subtasks/topics.ts index 8718589..d7c8d25 100644 --- a/src/tasks/summarization_subtasks/topics.ts +++ b/src/tasks/summarization_subtasks/topics.ts @@ -194,7 +194,7 @@ ${otherCommentsTable} const commonGroundComments = this.input.getCommonGroundComments(); const nComments = commonGroundComments.length; if (nComments === 0) { - return `No comments met the thresholds necessary to be considered as a point of common ground (at least ${this.input.minVoteCount} votes, and at least ${decimalToPercent(this.input.minAgreeProbCommonGround)} agreement across groups).`; + return `No comments met the thresholds necessary to be considered as a point of common ground (at least ${this.input.minVoteCount} votes, and at least ${decimalToPercent(this.input.minCommonGroundProb)} agreement across groups).`; } else { const summary = this.model.generateText( getPrompt(