From 846cc691c6e115ebfba005de164797a4fdb56499 Mon Sep 17 00:00:00 2001
From: Clayton Thorrez <claytonthorrez@gmail.com>
Date: Sun, 22 Sep 2024 20:29:17 -0700
Subject: [PATCH] add tqdm and num-cpu

---
 fastchat/serve/monitor/elo_analysis.py   | 11 +++++++++--
 fastchat/serve/monitor/rating_systems.py | 21 +++++++++++++++------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fastchat/serve/monitor/elo_analysis.py b/fastchat/serve/monitor/elo_analysis.py
index aeb4ec372e..6a16c92184 100644
--- a/fastchat/serve/monitor/elo_analysis.py
+++ b/fastchat/serve/monitor/elo_analysis.py
@@ -332,6 +332,7 @@ def report_elo_analysis_results(
     scale=1,
     filter_func=lambda x: True,
     style_control=False,
+    num_cpu=None,
 ):
     battles = pd.DataFrame(battles_json)
 
@@ -378,10 +379,14 @@ def report_elo_analysis_results(
             )
             elo_rating_final, coef_final = compute_style_control(battles)
         else:
-            bootstrap_df = compute_bootstrap_bt(battles, num_round=num_bootstrap)
+            bootstrap_df = compute_bootstrap_bt(
+                battles, num_round=num_bootstrap, num_cpu=num_cpu
+            )
             elo_rating_final = compute_bt(battles)
     elif rating_system == "elo":
-        bootstrap_df = compute_bootstrap_elo(battles, num_round=num_bootstrap)
+        bootstrap_df = compute_bootstrap_elo(
+            battles, num_round=num_bootstrap, num_cpu=num_cpu
+        )
         elo_rating_median = get_median_elo_from_bootstrap(bootstrap_df)
         elo_rating_final = elo_rating_median
 
@@ -485,6 +490,7 @@ def pretty_print_elo_rating(rating):
     parser.add_argument("--category", nargs="+", default=["full"])
     parser.add_argument("--scale", type=float, default=1)
     parser.add_argument("--style-control", action="store_true")
+    parser.add_argument("--num-cpu", type=int, default=12)
     args = parser.parse_args()
 
     np.random.seed(42)
@@ -523,6 +529,7 @@ def pretty_print_elo_rating(rating):
             scale=args.scale,
             filter_func=filter_func,
             style_control=args.style_control,
+            num_cpu=args.num_cpu,
         )
 
     for cat in args.category:
diff --git a/fastchat/serve/monitor/rating_systems.py b/fastchat/serve/monitor/rating_systems.py
index 8711fba2ea..6dda5b5e62 100644
--- a/fastchat/serve/monitor/rating_systems.py
+++ b/fastchat/serve/monitor/rating_systems.py
@@ -6,6 +6,7 @@
 from scipy.special import expit
 from scipy.optimize import minimize
 import pandas as pd
+from tqdm import tqdm
 
 
 STYLE_CONTROL_ELEMENTS_V1 = [
@@ -217,7 +218,13 @@ def compute_bt(df, base=10.0, scale=400.0, init_rating=1000, tol=1e-6):
 
 
 def compute_bootstrap_bt(
-    battles, num_round, base=10.0, scale=400.0, init_rating=1000.0, tol=1e-6
+    battles,
+    num_round,
+    base=10.0,
+    scale=400.0,
+    init_rating=1000.0,
+    tol=1e-6,
+    num_cpu=None,
 ):
     matchups, outcomes, models, weights = preprocess_for_bt(battles)
     # bootstrap sample the unique outcomes and their counts directly using the multinomial distribution
@@ -232,8 +239,8 @@ def compute_bootstrap_bt(
     bt_fn = partial(
         fit_bt, matchups, outcomes, n_models=len(models), alpha=np.log(base), tol=tol
     )
-    with mp.Pool(os.cpu_count()) as pool:
-        results = pool.map(bt_fn, boot_weights)
+    with mp.Pool(num_cpu if num_cpu else os.cpu_count()) as pool:
+        results = list(tqdm(pool.imap_unordered(bt_fn, boot_weights), total=num_round))
 
     ratings = np.array(results)
     scaled_ratings = scale_and_offset(ratings, models, scale, init_rating)
@@ -346,6 +353,7 @@ def compute_bootstrap_style_control(
     init_rating=1000.0,
     scale=400.0,
     tol=1e-6,
+    num_cpu=None,
 ):
     matchups, features, outcomes, models = preprocess_for_style(df)
 
@@ -364,9 +372,10 @@ def compute_bootstrap_style_control(
         low=0, high=matchups.shape[0], size=(num_round, matchups.shape[0])
     )
 
-    # this one is still memory and cpu intensive so don't make too many processes
-    with mp.Pool(4) as pool:
-        results = pool.map(contextual_bt_fn, boot_idxs)
+    with mp.Pool(num_cpu if num_cpu else os.cpu_count()) as pool:
+        results = list(
+            tqdm(pool.imap_unordered(contextual_bt_fn, boot_idxs), total=num_round)
+        )
 
     ratings_params = np.array(results)
     ratings = ratings_params[:, : len(models)]