From 5d1afa649672972dc48b186335189808de7a6043 Mon Sep 17 00:00:00 2001 From: Jonathan Schneider Date: Thu, 12 Nov 2020 13:29:27 +0100 Subject: [PATCH 1/2] Fix misleading help text see https://stackoverflow.com/questions/52099379/mallet-hyperparameter-optimization --- src/cc/mallet/topics/tui/TopicTrainer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cc/mallet/topics/tui/TopicTrainer.java b/src/cc/mallet/topics/tui/TopicTrainer.java index 951bf37e9..b00841906 100644 --- a/src/cc/mallet/topics/tui/TopicTrainer.java +++ b/src/cc/mallet/topics/tui/TopicTrainer.java @@ -147,7 +147,7 @@ public class TopicTrainer { "The number of iterations to run before first estimating dirichlet hyperparameters.", null); static CommandOption.Boolean useSymmetricAlpha = new CommandOption.Boolean(TopicTrainer.class, "use-symmetric-alpha", "true|false", false, false, - "Only optimize the concentration parameter of the prior over document-topic distributions. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null); + "Only optimize the concentration parameter of the prior over topic-words distributions. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null); static CommandOption.Double alpha = new CommandOption.Double(TopicTrainer.class, "alpha", "DECIMAL", true, 5.0, "SumAlpha parameter: sum over topics of smoothing over doc-topic distributions. alpha_k = [this value] / [num topics]",null); From fe6838add525b1a1d99c8985621335b70200ed0e Mon Sep 17 00:00:00 2001 From: Jonathan Schneider Date: Wed, 19 May 2021 18:29:48 +0200 Subject: [PATCH 2/2] Improve misleading help text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dirichlet Distribution := Distribution over K-dimensional positive vectors that sum to one (i.e., points on the probability simplex) Two parameters: - Base measure (positive vector; sums to one) - Concentration parameter α (positive scalar) See https://people.cs.umass.edu/~wallach/talks/priors.pdf --- src/cc/mallet/topics/tui/TopicTrainer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cc/mallet/topics/tui/TopicTrainer.java b/src/cc/mallet/topics/tui/TopicTrainer.java index b00841906..80ecff742 100644 --- a/src/cc/mallet/topics/tui/TopicTrainer.java +++ b/src/cc/mallet/topics/tui/TopicTrainer.java @@ -147,7 +147,7 @@ public class TopicTrainer { "The number of iterations to run before first estimating dirichlet hyperparameters.", null); static CommandOption.Boolean useSymmetricAlpha = new CommandOption.Boolean(TopicTrainer.class, "use-symmetric-alpha", "true|false", false, false, - "Only optimize the concentration parameter of the prior over topic-words distributions. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null); + "Optimize the concentration parameter (SumAlpha) of the prior over document-topic distributions while keeping it symmetric. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null); static CommandOption.Double alpha = new CommandOption.Double(TopicTrainer.class, "alpha", "DECIMAL", true, 5.0, "SumAlpha parameter: sum over topics of smoothing over doc-topic distributions. alpha_k = [this value] / [num topics]",null);