More logical version of ql_nb_steps; test more precisions when profiling

flintlib · Jan 28, 2025 · 8c907d3 · 8c907d3
1 parent cbc50fc
commit 8c907d3
Show file tree

Hide file tree

Showing 2 changed files with 96 additions and 46 deletions.
diff --git a/src/acb_theta/profile/p-ql_exact.c b/src/acb_theta/profile/p-ql_exact.c
@@ -10,6 +10,7 @@
 */
 
 #include <stdlib.h>
+#include <math.h>
 #include "profiler.h"
 #include "ulong_extras.h"
 #include "acb.h"
@@ -79,7 +80,7 @@ int main(int argc, char * argv[])
     acb_mat_printd(tau, 5);
     _acb_vec_printd(z + g, g, 5);
 
-    for (prec = pmin; prec <= pmax; prec *= 2)
+    for (prec = pmin; prec <= pmax; prec = ceil(1.3 * prec))
     {
         acb_theta_ql_nb_steps(pattern, tau, cst, prec);
 

diff --git a/src/acb_theta/ql_nb_steps.c b/src/acb_theta/ql_nb_steps.c
@@ -23,7 +23,7 @@ acb_theta_ql_nb_steps(slong * pattern, const acb_mat_t tau, int cst, slong prec)
     slong lp = ACB_THETA_LOW_PREC;
     arb_mat_t cho, yinv;
     arb_t x, t;
-    slong s, nb;
+    slong s, nb, j, nb_max;
 
     arb_init(x);
     arb_init(t);
@@ -32,6 +32,10 @@ acb_theta_ql_nb_steps(slong * pattern, const acb_mat_t tau, int cst, slong prec)
 
     acb_siegel_cho_yinv(cho, yinv, tau, lp);
 
+    /* Compute rough pattern (could be negative) */
+    /* Note: we could be more precise by scaling x by something else than
+       powers of 2. */
+    nb_max = 0;
     for (s = 0; s < g; s++)
     {
         arb_sqr(x, arb_mat_entry(cho, s, s), lp);
@@ -48,67 +52,112 @@ acb_theta_ql_nb_steps(slong * pattern, const acb_mat_t tau, int cst, slong prec)
             return 0;
         }
 
-        nb =  -arf_get_si(arb_midref(x), ARF_RND_NEAR);
-
-        /* Adapt pattern in light of experimental performance */
-        /* See /path/to/flint/build/acb_theta/profile/p-acb_theta_ql_exact */
+        nb = -arf_get_si(arb_midref(x), ARF_RND_NEAR);
         if (s == 0)
         {
-            /* Rationale is: acb_modular_theta_sum is so fast that we don't
-               need so many duplication steps. */
-            if (g == 1 && cst)
-            {
-                nb -= 8;
-            }
-            else
+            /* acb_modular_theta_sum is so fast that we don't need so many
+               duplication steps. */
+            nb -= 5;
+        }
+        else if (s == 1)
+        {
+            /* summation in genus 2 is also quite efficient. */
+            nb -= 3;
+        }
+        else if (s == 2)
+        {
+            nb -= 1;
+        }
+        else
+        {
+            nb += 1;
+        }
+        /* One less step if at least 9 for more balance with summation phase */
+        if (nb > 8)
+        {
+            nb -= 1;
+        }
+
+        pattern[s] = nb;
+        nb_max = FLINT_MAX(nb_max, nb);
+    }
+
+    /* Start adapting the pattern from s = g-1 downwards. This is because the
+      choice of whether to trigger dimension-lowering formulas in low
+      dimensions will depend on whether or not duplications/dimension-lowerings
+      have already been applied. */
+    /* See /path/to/flint/build/acb_theta/profile/p-acb_theta_ql_exact */
+    for (s = g - 1; s >= 0; s--)
+    {
+        /* Find out how many duplication steps have been performed so far
+           (could be negative) */
+        nb = -10;
+        for (j = s + 1; j < g; j++)
+        {
+            nb = FLINT_MAX(nb, pattern[j]);
+        }
+
+        /* Force trigger dimension-lowering at that point ? We only do this if
+           nb is negative as the ellipsoid really contains very few points. */
+        if (nb < 0 && (s == 0 || pattern[s] == 0))
+        {
+            pattern[s] = FLINT_MAX(1, pattern[s]);
+        }
+
+        /* Force more duplication steps ? We only do this if there will be a
+           dimension-lowering later on. */
+        if (pattern[s] < nb_max)
+        {
+            pattern[s] = FLINT_MIN(nb_max - 1, pattern[s] + 3);
+        }
+
+        /* Avoid making any duplication steps ? We only do this if the
+           suggested number of steps for this s and the total number of steps
+           are small. */
+        if (nb == 0 && pattern[s] <= 2)
+        {
+            if (s >= 2 && nb_max <= 1)
             {
-                nb -= 5;
+                for (j = 0; j <= s; j++)
+                {
+                    pattern[s] = 0;
+                }
             }
-            /* One less step if at least 9 */
-            /* Rationale is: more balance with summation phase */
-            if (nb > 8)
+            else if (s == 1 && nb_max + pattern[s] <= 5)
             {
-                nb -= 1;
+                pattern[0] = FLINT_MAX(pattern[0] - 2, 0);
+                pattern[s] == 0;
             }
-            /* Never make <= 2 duplication steps. */
-            /* Rationale is: this avoids having the slight overhead of
-               computing distances, etc. */
-            if (nb <= 2)
+            else if (s == 0)
             {
-                nb = 0;
+                pattern[s] = 0;
             }
         }
-        if (s == 1)
+        /* In the case of genus 1 theta constants, we are more aggressive in
+           avoiding duplication, because acb_modular_theta_sum is faster for constants */
+        if (s == 0 && nb == 0 && cst)
         {
-            /* Rationale is: summation in genus 2 is also quite efficient. */
-            nb -= 2;
-            if (nb < pattern[0])
+            pattern[0] = pattern[0] - 3;
+            if (pattern[0] <= 2)
             {
-                /* Rationale is: if we use the dimension-lowering strategy, then
-                   one more duplication step will nicely decrease the number of
-                   auxiliary points. */
-                nb = FLINT_MIN(pattern[0] - 1, nb + 2);
-            }
-            /* Never make <= 2 duplication steps. */
-            /* Rationale is: this avoids having the slight overhead of
-               computing distances, etc. */
-            if (nb <= 2)
-            {
-                nb = 0;
+                pattern[0] = 0;
             }
         }
+        /* One less duplication step in for s = 0 if it doesn't mess with
+           dimension lowering */
+        if (s == 0 && pattern[0] > nb + 1)
+        {
+            pattern[0] -= 1;
+        }
+
+        /* Make pattern a nonincreasing vector */
         pattern[s] = FLINT_MAX(0, nb);
     }
 
-    /* Clean up: make pattern a nonincreasing vector */
-    for (s = g - 1; s >= 1; s--)
-    {
-        pattern[s - 1] = FLINT_MAX(pattern[s - 1], pattern[s]);
-    }
-    /* This seems to work experimentally. */
-    if (g >= 2 && pattern[1] == 0)
+    /* Clean up: make pattern a nonnegative vector */
+    for (s = 0; s < g; s++)
     {
-        pattern[0] = FLINT_MAX(0, pattern[0] - 1);
+        pattern[s] = FLINT_MAX(0, pattern[s]);
     }
 
     arb_clear(x);