From 0072ad7c5e978bb19e7ba9ecb601adc24eeda642 Mon Sep 17 00:00:00 2001 From: Jonathan Schneider Date: Fri, 21 May 2021 18:26:29 +0200 Subject: [PATCH] Add reference to Hanna Wallach's Ph.D. thesis see http://dirichlet.net/pdf/wallach08structured.pdf --- src/cc/mallet/types/Dirichlet.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cc/mallet/types/Dirichlet.java b/src/cc/mallet/types/Dirichlet.java index 4f73d61a2..857136a5a 100644 --- a/src/cc/mallet/types/Dirichlet.java +++ b/src/cc/mallet/types/Dirichlet.java @@ -555,6 +555,8 @@ public static void testSymmetricConcentration(int numDimensions, int numObservat /** * Learn Dirichlet parameters using frequency histograms + * described by Hanna Wallach in "Structured Topic Models for Language" (2008), section 2.4 + * Method 1: Using the Digamma Recurrence Relation (pp. 27-28) * * @param parameters A reference to the current values of the parameters, which will be updated in place * @param observations An array of count histograms. observations[10][3] could be the number of documents that contain exactly 3 tokens of word type 10. @@ -571,6 +573,8 @@ public static double learnParameters(double[] parameters, /** * Learn Dirichlet parameters using frequency histograms + * described by Hanna Wallach in "Structured Topic Models for Language", section 2.4 + * Method 1: Using the Digamma Recurrence Relation (pp. 27-28) and gamma hyperpriors (section 2.5, pp. 37-39) * * @param parameters A reference to the current values of the parameters, which will be updated in place * @param observations An array of count histograms. observations[10][3] could be the number of documents that contain exactly 3 tokens of word type 10.